mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
resolve merge conflict
This commit is contained in:
commit
54864bf102
93 changed files with 3423 additions and 690 deletions
|
|
@ -10,7 +10,7 @@ ARG DEBIAN_FRONTEND=noninteractive
|
|||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
sudo jq vim curl git ca-certificates wget \
|
||||
build-essential pkg-config automake autoconf libtool \
|
||||
ripgrep fd-find make unzip \
|
||||
ripgrep fd-find make unzip bash-completion \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Neovim latest (LazyVim needs >=0.9)
|
||||
|
|
@ -53,8 +53,8 @@ RUN echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] \
|
|||
https://apt.releases.hashicorp.com $(lsb_release -cs) main" | \
|
||||
tee /etc/apt/sources.list.d/hashicorp.list
|
||||
RUN apt update
|
||||
RUN apt-get install terraform
|
||||
RUN terraform -install-autocomplete
|
||||
RUN apt-get install -y terraform
|
||||
RUN terraform -install-autocomplete || true
|
||||
|
||||
# Install postgres
|
||||
RUN apt install -y wget gnupg2 lsb-release
|
||||
|
|
|
|||
|
|
@ -4,13 +4,7 @@
|
|||
"service": "model-backend",
|
||||
"remoteUser": "vscode",
|
||||
"workspaceFolder": "/workspaces/model",
|
||||
|
||||
// Host preflight: ensure GitHub auth exists before we try to build.
|
||||
// Either ~/.config/gh (from `gh auth login`) or a GITHUB_TOKEN env var.
|
||||
"initializeCommand": "test -d \"$HOME/.config/gh\" || test -n \"$GITHUB_TOKEN\" || { echo >&2 'error: no GitHub auth found. Run `gh auth login && gh auth setup-git` on the host, or export GITHUB_TOKEN, then retry.'; exit 1; }",
|
||||
|
||||
// Install Domna's curated skill set (pinned to 0.0.5) into this workspace.
|
||||
// `gh repo clone` handles private-repo auth using the mounted host ~/.config/gh.
|
||||
"initializeCommand": "docker network create shared-dev 2>/dev/null || true; test -d \"$HOME/.config/gh\" || test -n \"$GITHUB_TOKEN\" || { echo >&2 'error: no GitHub auth found. Run `gh auth login && gh auth setup-git` on the host, or export GITHUB_TOKEN, then retry.'; exit 1; }",
|
||||
"postCreateCommand": "gh repo clone Hestia-Homes/agentic-toolkit /tmp/agentic-toolkit -- --branch 0.0.5 --depth 1 && bash /tmp/agentic-toolkit/setup.sh",
|
||||
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
|
||||
"mounts": [
|
||||
|
|
@ -24,7 +18,6 @@
|
|||
"ms-toolsai.jupyter",
|
||||
"mechatroner.rainbow-csv",
|
||||
"ms-toolsai.datawrangler",
|
||||
"lindacong.vscode-book-reader",
|
||||
"4ops.terraform",
|
||||
"fabiospampinato.vscode-todo-plus",
|
||||
"jgclark.vscode-todo-highlight",
|
||||
|
|
@ -33,9 +26,6 @@
|
|||
"ms-python.black-formatter",
|
||||
"waderyan.gitblame",
|
||||
"GrapeCity.gc-excelviewer",
|
||||
"jakobhoeg.vscode-pokemon",
|
||||
"github.vscode-github-actions",
|
||||
"me-dutour-mathieu.vscode-github-actions",
|
||||
"anthropic.claude-code",
|
||||
"eamodio.gitlens"
|
||||
],
|
||||
|
|
|
|||
10
.github/workflows/_deploy_lambda.yml
vendored
10
.github/workflows/_deploy_lambda.yml
vendored
|
|
@ -82,6 +82,12 @@ on:
|
|||
required: false
|
||||
TF_VAR_hubspot_api_key:
|
||||
required: false
|
||||
|
||||
TF_VAR_magicplan_customer_id:
|
||||
required: false
|
||||
|
||||
TF_VAR_magicplan_api_key:
|
||||
required: false
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
|
|
@ -149,6 +155,8 @@ jobs:
|
|||
TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }}
|
||||
TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }}
|
||||
TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }}
|
||||
TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }}
|
||||
TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }}
|
||||
run: |
|
||||
ECR_REPO_URL_VAR=""
|
||||
if [[ -n "${{ inputs.ecr_repo }}" ]]; then
|
||||
|
|
@ -195,6 +203,8 @@ jobs:
|
|||
TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }}
|
||||
TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }}
|
||||
TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }}
|
||||
TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }}
|
||||
TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }}
|
||||
run: |
|
||||
EXTRA_VARS=""
|
||||
if [[ -n "${{ inputs.ecr_repo }}" ]]; then
|
||||
|
|
|
|||
5
.github/workflows/deploy_fastapi_backend.yml
vendored
5
.github/workflows/deploy_fastapi_backend.yml
vendored
|
|
@ -51,6 +51,10 @@ jobs:
|
|||
id: set_auth_token
|
||||
run: echo "::set-output name=auth_token::${{ secrets[format('{0}_EPC_AUTH_TOKEN', github.ref_name)] }}"
|
||||
|
||||
- name: Set Open EPC API token
|
||||
id: set_open_epc_token
|
||||
run: echo "::set-output name=open_epc_token::${{ secrets[format('{0}_OPEN_EPC_API_TOKEN', github.ref_name)] }}"
|
||||
|
||||
# Store port, name and host in github secrets
|
||||
- name: Set DB credentials
|
||||
id: set_db_credentials
|
||||
|
|
@ -127,6 +131,7 @@ jobs:
|
|||
GOOGLE_SOLAR_API_KEY: ${{ steps.set_api_secrets.outputs.google_solar_api_key }}
|
||||
DOMAIN_NAME: ${{ steps.set_domain.outputs.domain }}
|
||||
EPC_AUTH_TOKEN: ${{ steps.set_auth_token.outputs.auth_token }}
|
||||
OPEN_EPC_API_TOKEN: ${{ steps.set_open_epc_token.outputs.open_epc_token }}
|
||||
DB_HOST: ${{ steps.set_db_credentials.outputs.db_host }}
|
||||
DB_PORT: ${{ steps.set_db_credentials.outputs.db_port }}
|
||||
DB_NAME: ${{ steps.set_db_credentials.outputs.db_name }}
|
||||
|
|
|
|||
40
.github/workflows/deploy_terraform.yml
vendored
40
.github/workflows/deploy_terraform.yml
vendored
|
|
@ -537,11 +537,49 @@ jobs:
|
|||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
# ============================================================
|
||||
# Build MagicPlan Lambda image
|
||||
# ============================================================
|
||||
magic_plan_image:
|
||||
needs: [determine_stage, shared_terraform]
|
||||
uses: ./.github/workflows/_build_image.yml
|
||||
with:
|
||||
ecr_repo: magic-plan-${{ needs.determine_stage.outputs.stage }}
|
||||
dockerfile_path: backend/magic_plan/handler/Dockerfile
|
||||
build_context: .
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
# ============================================================
|
||||
# Deploy MagicPlan Lambda
|
||||
# ============================================================
|
||||
magic_plan_lambda:
|
||||
needs: [magic_plan_image, determine_stage]
|
||||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: magic_plan
|
||||
lambda_path: infrastructure/terraform/lambda/magic_plan
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: magic-plan-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.magic_plan_image.outputs.image_digest }}
|
||||
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
TF_VAR_db_host: ${{ secrets.DEV_DB_HOST }}
|
||||
TF_VAR_db_name: ${{ secrets.DEV_DB_NAME }}
|
||||
TF_VAR_db_port: ${{ secrets.DEV_DB_PORT }}
|
||||
TF_VAR_magicplan_customer_id: ${{ secrets.MAGICPLAN_CUSTOMER_ID }}
|
||||
TF_VAR_magicplan_api_key: ${{ secrets.MAGICPLAN_API_KEY }}
|
||||
|
||||
# ============================================================
|
||||
# Deploy Hubspot ETL Lambda
|
||||
# ============================================================
|
||||
hubspot_etl_lambda:
|
||||
needs: [hubspot_etl_image, determine_stage, pashub_to_ara_lambda]
|
||||
needs: [hubspot_etl_image, determine_stage, pashub_to_ara_lambda, magic_plan_lambda]
|
||||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: hubspot-etl-to-ara
|
||||
|
|
|
|||
17
.github/workflows/protect_releases.yml
vendored
Normal file
17
.github/workflows/protect_releases.yml
vendored
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
name: Restrict PR source
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- dev
|
||||
|
||||
jobs:
|
||||
check-source-branch:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Fail if PR is not from main
|
||||
run: |
|
||||
if [[ "${{ github.head_ref }}" != "main" ]]; then
|
||||
echo "Only PRs from main are allowed into dev"
|
||||
exit 1
|
||||
fi
|
||||
4
.github/workflows/unit_tests.yml
vendored
4
.github/workflows/unit_tests.yml
vendored
|
|
@ -49,7 +49,11 @@ jobs:
|
|||
docker run --rm \
|
||||
--network host \
|
||||
-e EPC_AUTH_TOKEN=${{ secrets.DEV_EPC_AUTH_TOKEN }} \
|
||||
-e OPEN_EPC_API_TOKEN=${{ secrets.DEV_OPEN_EPC_API_TOKEN }} \
|
||||
-e HUBSPOT_API_KEY=${{ secrets.HUBSPOT_API_KEY }} \
|
||||
-e AWS_ACCESS_KEY_ID=${{ secrets.DEV_AWS_ACCESS_KEY_ID }} \
|
||||
-e AWS_SECRET_ACCESS_KEY=${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} \
|
||||
-e AWS_DEFAULT_REGION=${{ secrets.DEV_AWS_REGION }} \
|
||||
-e DB_HOST=localhost \
|
||||
-e DB_NAME=test \
|
||||
-e DB_USERNAME=test \
|
||||
|
|
|
|||
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -292,3 +292,6 @@ pyrightconfig.json
|
|||
# playwright output
|
||||
*/pashub_fetcher/videos/*
|
||||
backlog/*
|
||||
|
||||
# Local Claude config files
|
||||
.claude/*
|
||||
38
CLAUDE.md
38
CLAUDE.md
|
|
@ -28,3 +28,41 @@ You MUST read the overview resource to understand the complete workflow. The inf
|
|||
|
||||
<!-- BACKLOG.MD MCP GUIDELINES END -->
|
||||
|
||||
## Available Skills
|
||||
|
||||
Five Claude Code skills are installed in this repo's dev container. Each maps to a phase of the feature lifecycle.
|
||||
|
||||
| Skill | Invoke | When to use |
|
||||
|-------|--------|-------------|
|
||||
| **grill-me** | `/grill-me` | Before implementing — stress-tests a design through sequential questioning |
|
||||
| **to-prd** | `/to-prd` | After a planning conversation — formalises context into a GitHub issue PRD |
|
||||
| **ubiquitous-language** | `/ubiquitous-language` | When domain terms are drifting or ambiguous — builds/updates `UBIQUITOUS_LANGUAGE.md` |
|
||||
| **tdd** | `/tdd` | During implementation — enforces vertical-slice TDD (one test → one impl → repeat) |
|
||||
| **improve-codebase-architecture** | `/improve-codebase-architecture` | During refactoring — surfaces shallow modules and proposes deepening opportunities |
|
||||
|
||||
### Typical session chains
|
||||
|
||||
**Feature planning:**
|
||||
`/grill-me` → `/to-prd` → `/ubiquitous-language`
|
||||
|
||||
**Implementation:**
|
||||
`/tdd` (+ `/grill-me` if a design fork appears mid-session)
|
||||
|
||||
**Refactoring:**
|
||||
`/improve-codebase-architecture` → `/grill-me` → `/tdd` → `/ubiquitous-language`
|
||||
|
||||
### First time setting up?
|
||||
|
||||
New containers install all skills automatically via the Dockerfile. If you're in an existing container, run:
|
||||
|
||||
```bash
|
||||
bash .devcontainer/backend/install-claude-skills.sh
|
||||
```
|
||||
|
||||
## Type Safety
|
||||
|
||||
All new code must pass `pyright` with zero errors under `typeCheckingMode = strict`.
|
||||
Use Optional over | None
|
||||
Annotate all function return types. Use `dict[str, Any]` for untyped external API
|
||||
payloads — never bare `dict`. Add `pandas-stubs` when introducing pandas to a module.
|
||||
|
||||
|
|
|
|||
10
Makefile
10
Makefile
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
PYTHON = python
|
||||
|
||||
.PHONY: setup test lint typecheck check clean
|
||||
.PHONY: setup test lint typecheck check clean network-setup dev-setup
|
||||
|
||||
# Install dev dependencies + tox
|
||||
setup:
|
||||
|
|
@ -28,3 +28,11 @@ check: lint typecheck test
|
|||
# Clean up tox environments
|
||||
clean:
|
||||
rm -rf .tox
|
||||
|
||||
# Create shared Docker network required by dev container (idempotent)
|
||||
network-setup:
|
||||
docker network create shared-dev 2>/dev/null || true
|
||||
|
||||
# First-time dev environment setup
|
||||
dev-setup: network-setup
|
||||
@echo "Dev environment ready. Open the repo in VS Code and select 'Reopen in Container'."
|
||||
|
|
|
|||
21
README.md
21
README.md
|
|
@ -8,6 +8,27 @@ The different folders in this repository relate to services
|
|||
that can be used independently, or can be imported and used as
|
||||
part of a larger application
|
||||
|
||||
# Getting Started
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- [Docker Desktop](https://www.docker.com/products/docker-desktop/)
|
||||
- [VS Code](https://code.visualstudio.com/) with the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers)
|
||||
|
||||
## Dev Container Setup
|
||||
|
||||
This repo uses a Docker Compose-based dev container. The `model-backend` service joins a `shared-dev` Docker network so it can communicate with other local services (e.g. a frontend container) running on your machine.
|
||||
|
||||
**VS Code users:** The `initializeCommand` in `devcontainer.json` creates the `shared-dev` network automatically before the container starts. No manual step required — just open the repo and select **Reopen in Container**.
|
||||
|
||||
**Non-VS Code / CI workflows:** Run the following once before starting the container:
|
||||
|
||||
```commandline
|
||||
make dev-setup
|
||||
```
|
||||
|
||||
This is idempotent and safe to re-run if the network already exists.
|
||||
|
||||
# Folders
|
||||
|
||||
### backend/
|
||||
|
|
|
|||
78
UBIQUITOUS_LANGUAGE.md
Normal file
78
UBIQUITOUS_LANGUAGE.md
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
# Ubiquitous Language
|
||||
|
||||
Domain terminology glossary for this project. Generated and maintained by the `/ubiquitous-language` Claude Code skill.
|
||||
|
||||
Invoke `/ubiquitous-language` in any session to extract new terms from the conversation, flag ambiguities, and update this file with canonical definitions.
|
||||
|
||||
---
|
||||
|
||||
## Energy Performance Certificates
|
||||
|
||||
| Term | Definition | Aliases to avoid |
|
||||
|------|------------|------------------|
|
||||
| **EPC** | An Energy Performance Certificate — a government-issued document rating a dwelling's energy efficiency from A (best) to G (worst). | "energy certificate", "energy report" |
|
||||
| **Certificate Number** | The unique identifier assigned to an EPC by the government registry. | "cert number", "EPC ID" |
|
||||
| **Registration Date** | The date an EPC was lodged with the government register; used to identify the most recent certificate for a property. | "assessment date", "submission date" |
|
||||
| **EPC Band** | A single letter A–G representing a property's current or potential energy efficiency rating. | "energy rating", "EPC grade", "EPC score" |
|
||||
| **Schema Type** | The versioned RdSAP or SAP schema that describes the structure of a certificate's raw data (e.g. `RdSAP-Schema-21.0.1`). | "schema version", "EPC format" |
|
||||
| **Domestic Certificate** | An EPC issued for a residential dwelling, as opposed to a commercial one. | "residential EPC", "home EPC" |
|
||||
|
||||
## Properties and Addresses
|
||||
|
||||
| Term | Definition | Aliases to avoid |
|
||||
|------|------------|------------------|
|
||||
| **UPRN** | Unique Property Reference Number — the government-issued permanent identifier for a physical address in the UK. | "property ID", "address ID", "code" |
|
||||
| **Postcode** | A UK postal code used to group nearby addresses; the primary search key for finding EPC records. | "zip code", "postal code" |
|
||||
| **User Address** | A free-text address string provided by a user or imported from a customer dataset, before any normalisation or matching. | "user input", "raw address", "user_inputed_address" |
|
||||
| **Dwelling** | A single residential unit that can hold an EPC — a house, flat, or maisonette. | "property", "unit", "home" |
|
||||
|
||||
## Address Matching
|
||||
|
||||
| Term | Definition | Aliases to avoid |
|
||||
|------|------------|------------------|
|
||||
| **Lexiscore** | A similarity score in [0, 1] between a user address and a candidate EPC address; combines token overlap and character-level similarity. | "score", "match score", "similarity" |
|
||||
| **Lexirank** | Dense rank of candidates sorted by lexiscore descending; rank 1 = best match. | "rank", "position" |
|
||||
| **UPRN Candidate** | An EPC search result that is a plausible match for a given user address, before scoring decides the winner. | "match candidate", "result" |
|
||||
| **Score Threshold** | The minimum lexiscore (currently 0.6) below which no match is returned even if a candidate exists. | "minimum score", "cutoff" |
|
||||
| **Ambiguous Match** | A matching outcome where two or more candidates share lexirank 1, making it impossible to select a unique winner. | "tie", "draw", "duplicate" |
|
||||
| **Best Match** | The single UPRN candidate with lexirank 1 that meets or exceeds the score threshold. | "winner", "top result" |
|
||||
|
||||
## API and Integration
|
||||
|
||||
| Term | Definition | Aliases to avoid |
|
||||
|------|------------|------------------|
|
||||
| **EPC Search Result** | A lightweight record returned by the government domestic search endpoint — contains address lines, postcode, UPRN, band, and certificate number but not the full certificate data. | "search row", "EPC row", "result" |
|
||||
| **EPC Property Data** | The fully mapped domain object produced after fetching and parsing a complete EPC certificate. | "EPC data", "certificate data", "parsed EPC" |
|
||||
| **Old EPC API** | The retired government API (`epc.opendatacommunities.org`) using HTTP Basic auth; decommissioned May 2026. | "legacy API" |
|
||||
| **New EPC API** | The replacement government API (`api.get-energy-performance-data.communities.gov.uk`) using Bearer token auth. | "new API", "current API" |
|
||||
| **Bearer Token** | The auth credential required by the new EPC API; stored in the `EPC_AUTH_TOKEN` environment variable. | "API key", "auth token", "secret" |
|
||||
|
||||
## Relationships
|
||||
|
||||
- An **EPC** belongs to exactly one **Dwelling** and has one **Certificate Number**.
|
||||
- A **Dwelling** may have multiple **EPCs** across time; the one with the most recent **Registration Date** is the current one.
|
||||
- A **UPRN** identifies a **Dwelling** permanently; it does not change when the property changes owner.
|
||||
- An **EPC Search Result** is a summary; it points to a full **EPC** via its **Certificate Number**.
|
||||
- **Address Matching** uses a **User Address** and **Postcode** to find a **UPRN** by scoring **UPRN Candidates** from an EPC search.
|
||||
- A **Lexirank** of 1 with no **Ambiguous Match** and a **Lexiscore** ≥ the **Score Threshold** produces a **Best Match**.
|
||||
|
||||
## Example dialogue
|
||||
|
||||
> **Dev:** "We have a user address and postcode. How do we find the UPRN?"
|
||||
|
||||
> **Domain expert:** "Search the **New EPC API** by **Postcode** — you get back a list of **EPC Search Results** for that area. Each one has an address and a **UPRN**. Score each against the **User Address** using the **Lexiscore**. If the top **UPRN Candidate** scores above the **Score Threshold** and there's no **Ambiguous Match**, that's your **Best Match**."
|
||||
|
||||
> **Dev:** "What if two results share the same address line 1?"
|
||||
|
||||
> **Domain expert:** "That's an **Ambiguous Match** — two candidates at **Lexirank** 1. Fall back to scoring on the full address using all address lines joined together. If that still ties, return nothing."
|
||||
|
||||
> **Dev:** "Once we have the best match, do we use the UPRN or fetch the full EPC?"
|
||||
|
||||
> **Domain expert:** "Depends on what you need. The **EPC Search Result** gives you the **EPC Band** and **Certificate Number**. If you need energy efficiency detail, use the **Certificate Number** to fetch the full **EPC Property Data**."
|
||||
|
||||
## Flagged ambiguities
|
||||
|
||||
- **"address"** appears as both the raw **User Address** (free-text from customer data) and a structured field on an **EPC Search Result** (normalised address lines). Always qualify: "user address" vs "EPC address" or "address line 1".
|
||||
- **"score"** is used for the `AddressMatch.score()` function output, the `lexiscore` DataFrame column, and informally in conversation. Prefer **Lexiscore** in domain discussions; reserve "score" for method-level code comments.
|
||||
- **"user_inputed_address"** in `backend/address2UPRN/main.py` is a misspelling and a synonym for **User Address** — the canonical term. New code should use `user_address`.
|
||||
- **"EPC"** is overloaded as both the document (an Energy Performance Certificate) and the rating band letter. Use **EPC** for the document and **EPC Band** for the letter.
|
||||
|
|
@ -31,17 +31,19 @@ from recommendations.recommendation_utils import (
|
|||
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
|
||||
from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
|
||||
|
||||
from dotenv import load_dotenv
|
||||
# from dotenv import load_dotenv
|
||||
|
||||
logger = setup_logger()
|
||||
load_dotenv(dotenv_path="../backend/.env")
|
||||
# load_dotenv(dotenv_path="../backend/.env")
|
||||
|
||||
# OpenAI API Key (set this in your environment variables for security)
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
||||
# OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
||||
|
||||
|
||||
class DataRemapper:
|
||||
def __init__(self, standard_values, standard_map=None, max_tokens=1000):
|
||||
def __init__(
|
||||
self, standard_values, standard_map=None, max_tokens=1000, api_key=None
|
||||
):
|
||||
"""
|
||||
Initialize the remapper with standard values and a predefined mapping.
|
||||
|
||||
|
|
@ -75,7 +77,8 @@ class DataRemapper:
|
|||
"gpt-3.5-turbo": {"input": 0.0015 / 1000, "output": 0.002 / 1000},
|
||||
}
|
||||
|
||||
self.openai_client = OpenAI(api_key=OPENAI_API_KEY)
|
||||
print(f"DATA REMAPPER api key is {api_key}")
|
||||
self.openai_client = OpenAI(api_key=api_key)
|
||||
|
||||
@staticmethod
|
||||
def clean_string(text):
|
||||
|
|
@ -136,12 +139,20 @@ class DataRemapper:
|
|||
raise ValueError("Input tokens exceed the maximum limit.")
|
||||
|
||||
logger.info("Calling OpenAI API for standardization...")
|
||||
response = self.openai_client.chat.completions.create(
|
||||
model=self.ai_model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=self.max_tokens,
|
||||
temperature=0.1,
|
||||
)
|
||||
|
||||
try:
|
||||
response = self.openai_client.chat.completions.create(
|
||||
model=self.ai_model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=self.max_tokens,
|
||||
temperature=0.1,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[debug] OpenAI call failed. type={type(e).__name__}")
|
||||
print(f"[debug] status={getattr(e, 'status_code', None)}")
|
||||
print(f"[debug] body={getattr(e, 'response', None) and e.response.text}")
|
||||
print(f"[debug] model={self.ai_model}")
|
||||
raise
|
||||
|
||||
output_text = response.choices[0].message.content.strip()
|
||||
output_tokens = self.count_tokens(output_text) # Count output tokens
|
||||
|
|
@ -504,6 +515,7 @@ class AssetList:
|
|||
landlord_block_reference=None,
|
||||
phase=False,
|
||||
header=0,
|
||||
openai_api_key=None,
|
||||
):
|
||||
self.local_filepath = local_filepath
|
||||
self.sheet_name = sheet_name
|
||||
|
|
@ -529,6 +541,7 @@ class AssetList:
|
|||
self.ecosurv = None
|
||||
self.ecosurv_no_match = pd.DataFrame()
|
||||
self.geographical_areas = pd.DataFrame()
|
||||
self.openai_api_key = openai_api_key
|
||||
|
||||
# When this is True, we intend to break the programme into multiple phases. We may need to review
|
||||
# how this is structured in the future, as depending on how we get future data, we may need to
|
||||
|
|
@ -1107,6 +1120,7 @@ class AssetList:
|
|||
remapper = DataRemapper(
|
||||
standard_values=config["standard_values"],
|
||||
standard_map=config["standard_map"],
|
||||
api_key=self.openai_api_key,
|
||||
)
|
||||
remap_dictionary = remapper.standardize_list(
|
||||
values_to_remap=values_to_remap.tolist()
|
||||
|
|
@ -1296,8 +1310,8 @@ class AssetList:
|
|||
self.standardised_asset_list[
|
||||
self.ATTRIBUTE_HAS_SOLAR
|
||||
] = self.standardised_asset_list[
|
||||
self.FIND_EPC_DATA_NAMES["Solar photovoltaics"]
|
||||
] | ~self.standardised_asset_list[
|
||||
self.FIND_EPC_DATA_NAMES["Solar photovoltaics"]
|
||||
] | ~self.standardised_asset_list[
|
||||
self.EPC_API_DATA_NAMES["photo-supply"]
|
||||
].isin(
|
||||
["0.0", 0, None, "", np.nan]
|
||||
|
|
@ -1315,7 +1329,7 @@ class AssetList:
|
|||
property_type=(
|
||||
str(x[self.STANDARD_PROPERTY_TYPE]).title()
|
||||
if str(x[self.STANDARD_PROPERTY_TYPE]).title()
|
||||
in accepted_epc_property_types
|
||||
in accepted_epc_property_types
|
||||
else (
|
||||
x[self.EPC_API_DATA_NAMES["property-type"]]
|
||||
if not pd.isnull(
|
||||
|
|
@ -1373,9 +1387,9 @@ class AssetList:
|
|||
self.standardised_asset_list.apply(
|
||||
lambda x: estimate_perimeter(
|
||||
floor_area=x[self.EPC_API_DATA_NAMES["total-floor-area"]]
|
||||
/ x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
|
||||
/ x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
|
||||
num_rooms=x[self.EPC_API_DATA_NAMES["number-habitable-rooms"]]
|
||||
/ x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
|
||||
/ x[self.ATTRIBUTE_NUMBER_OF_FLOORS],
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
|
|
@ -1460,7 +1474,7 @@ class AssetList:
|
|||
year_lower_bound = (
|
||||
2007
|
||||
if x[self.EPC_API_DATA_NAMES["construction-age-band"]]
|
||||
== "England and Wales: 2007 onwards"
|
||||
== "England and Wales: 2007 onwards"
|
||||
else 2012
|
||||
)
|
||||
|
||||
|
|
@ -1515,7 +1529,7 @@ class AssetList:
|
|||
age_band_matches = (
|
||||
"EPC Age Band Matches Year Built"
|
||||
if x[self.STANDARD_YEAR_BUILT]
|
||||
== int(x[self.EPC_API_DATA_NAMES["construction-age-band"]])
|
||||
== int(x[self.EPC_API_DATA_NAMES["construction-age-band"]])
|
||||
else "EPC Age Band is different from Year Built"
|
||||
)
|
||||
|
||||
|
|
@ -1545,7 +1559,7 @@ class AssetList:
|
|||
age_band_matches = (
|
||||
"EPC Age Band Matches Year Built"
|
||||
if (x[self.STANDARD_YEAR_BUILT] >= float(lower_date))
|
||||
and (x[self.STANDARD_YEAR_BUILT] <= float(upper_date))
|
||||
and (x[self.STANDARD_YEAR_BUILT] <= float(upper_date))
|
||||
else (
|
||||
"EPC Age Band is older than Year Built"
|
||||
if x[self.STANDARD_YEAR_BUILT] > float(upper_date)
|
||||
|
|
@ -1717,22 +1731,22 @@ class AssetList:
|
|||
if self.non_intrusives_present:
|
||||
if self.new_format_non_insturives_present_v2:
|
||||
non_intrusives_wall_filter = (
|
||||
self.standardised_asset_list["non-intrusives: Construction"]
|
||||
== "CAVITY"
|
||||
) & self.standardised_asset_list["non-intrusives: Insulated"].isin(
|
||||
self.standardised_asset_list["non-intrusives: Construction"]
|
||||
== "CAVITY"
|
||||
) & self.standardised_asset_list["non-intrusives: Insulated"].isin(
|
||||
["EMPTY", "PARTIAL", "EMPTY CAVITY"]
|
||||
)
|
||||
else:
|
||||
non_intrusives_wall_filter = (
|
||||
self.standardised_asset_list["non-intrusives: Construction"]
|
||||
== "CAVITY"
|
||||
) & self.standardised_asset_list["non-intrusives: Insulated"].isin(
|
||||
self.standardised_asset_list["non-intrusives: Construction"]
|
||||
== "CAVITY"
|
||||
) & self.standardised_asset_list["non-intrusives: Insulated"].isin(
|
||||
["EMPTY", "PARTIAL"]
|
||||
)
|
||||
elif self.old_format_non_intrusives_present:
|
||||
non_intrusives_wall_filter = self.standardised_asset_list[
|
||||
"non-intrusives: WFT Findings"
|
||||
].str.lower().str.strip().isin(
|
||||
"non-intrusives: WFT Findings"
|
||||
].str.lower().str.strip().isin(
|
||||
[
|
||||
"empty cavity",
|
||||
"partial fill",
|
||||
|
|
@ -1742,18 +1756,18 @@ class AssetList:
|
|||
"empty cav",
|
||||
]
|
||||
) | (
|
||||
(
|
||||
self.standardised_asset_list["non-intrusives: WFT Findings"]
|
||||
.str.lower()
|
||||
.str.strip()
|
||||
.str.contains("empty cavity|partial fill")
|
||||
& ~self.standardised_asset_list["non-intrusives: WFT Findings"]
|
||||
.astype(str)
|
||||
.str.lower()
|
||||
.str.strip()
|
||||
.str.contains("major access issues")
|
||||
)
|
||||
)
|
||||
(
|
||||
self.standardised_asset_list["non-intrusives: WFT Findings"]
|
||||
.str.lower()
|
||||
.str.strip()
|
||||
.str.contains("empty cavity|partial fill")
|
||||
& ~self.standardised_asset_list["non-intrusives: WFT Findings"]
|
||||
.astype(str)
|
||||
.str.lower()
|
||||
.str.strip()
|
||||
.str.contains("major access issues")
|
||||
)
|
||||
)
|
||||
else:
|
||||
# We set the filter to False, as we have no non-intrusives
|
||||
non_intrusives_wall_filter = False
|
||||
|
|
@ -1765,12 +1779,12 @@ class AssetList:
|
|||
)
|
||||
else:
|
||||
year_built_filter = (
|
||||
self.standardised_asset_list[self.STANDARD_YEAR_BUILT]
|
||||
<= self.EMPTY_CAVITY_YEAR_THRESHOLD
|
||||
) | (
|
||||
self.standardised_asset_list["epc_year_upper_bound"]
|
||||
<= self.EMPTY_CAVITY_YEAR_THRESHOLD
|
||||
)
|
||||
self.standardised_asset_list[self.STANDARD_YEAR_BUILT]
|
||||
<= self.EMPTY_CAVITY_YEAR_THRESHOLD
|
||||
) | (
|
||||
self.standardised_asset_list["epc_year_upper_bound"]
|
||||
<= self.EMPTY_CAVITY_YEAR_THRESHOLD
|
||||
)
|
||||
|
||||
# Criteria:
|
||||
# The property isn't a bedsit
|
||||
|
|
@ -1811,8 +1825,8 @@ class AssetList:
|
|||
] = (
|
||||
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"]
|
||||
& ~self.standardised_asset_list[
|
||||
"non_intrusive_indicates_empty_cavity_has_solar"
|
||||
]
|
||||
"non_intrusive_indicates_empty_cavity_has_solar"
|
||||
]
|
||||
& (
|
||||
~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(
|
||||
["bedsit"]
|
||||
|
|
@ -1888,8 +1902,8 @@ class AssetList:
|
|||
.str.lower()
|
||||
.isin(self.EPC_NO_WALL_INSULATION_DESCRIPTIONS)
|
||||
| self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(
|
||||
["uninsulated cavity"]
|
||||
)
|
||||
["uninsulated cavity"]
|
||||
)
|
||||
)
|
||||
|
||||
######################################################
|
||||
|
|
@ -1926,8 +1940,8 @@ class AssetList:
|
|||
extraction_wall_filter = (
|
||||
extraction_wall_filter
|
||||
& ~self.standardised_asset_list[
|
||||
"non-intrusives: Eligibility (Red/Yellow/Green)"
|
||||
].isin(["RED"])
|
||||
"non-intrusives: Eligibility (Red/Yellow/Green)"
|
||||
].isin(["RED"])
|
||||
)
|
||||
|
||||
self.standardised_asset_list[
|
||||
|
|
@ -2023,26 +2037,26 @@ class AssetList:
|
|||
self.standardised_asset_list[
|
||||
"solar_epc_data_indicates_correct_heating_system"
|
||||
] = (
|
||||
self.standardised_asset_list[
|
||||
self.EPC_API_DATA_NAMES["mainheat-description"]
|
||||
]
|
||||
.str.lower()
|
||||
.str.contains(
|
||||
"air source heat pump|ground source heat pump|boiler and radiators, electric"
|
||||
)
|
||||
) | (
|
||||
self.standardised_asset_list[
|
||||
self.EPC_API_DATA_NAMES["mainheat-description"]
|
||||
]
|
||||
.str.lower()
|
||||
.str.contains("electric storage heaters")
|
||||
& (
|
||||
self.standardised_asset_list[
|
||||
self.EPC_API_DATA_NAMES["mainheatcont-description"]
|
||||
]
|
||||
== "Controls for high heat retention storage heaters"
|
||||
)
|
||||
self.standardised_asset_list[
|
||||
self.EPC_API_DATA_NAMES["mainheat-description"]
|
||||
]
|
||||
.str.lower()
|
||||
.str.contains(
|
||||
"air source heat pump|ground source heat pump|boiler and radiators, electric"
|
||||
)
|
||||
) | (
|
||||
self.standardised_asset_list[
|
||||
self.EPC_API_DATA_NAMES["mainheat-description"]
|
||||
]
|
||||
.str.lower()
|
||||
.str.contains("electric storage heaters")
|
||||
& (
|
||||
self.standardised_asset_list[
|
||||
self.EPC_API_DATA_NAMES["mainheatcont-description"]
|
||||
]
|
||||
== "Controls for high heat retention storage heaters"
|
||||
)
|
||||
)
|
||||
|
||||
# If the landlord has given us the heating system, we default to that on heating upgrades. Because of the
|
||||
# poor heating in place, if the EPC indicates that this property had a low efficiency heating system but the
|
||||
|
|
@ -2050,25 +2064,25 @@ class AssetList:
|
|||
self.standardised_asset_list[
|
||||
"solar_epc_data_indicates_requires_heating_upgrade"
|
||||
] = (
|
||||
self.standardised_asset_list[
|
||||
self.EPC_API_DATA_NAMES["mainheat-description"]
|
||||
]
|
||||
.str.lower()
|
||||
.str.contains("electric storage heaters|room heaters")
|
||||
& (
|
||||
self.standardised_asset_list[
|
||||
self.EPC_API_DATA_NAMES["mainheat-description"]
|
||||
self.EPC_API_DATA_NAMES["mainheatcont-description"]
|
||||
]
|
||||
.str.lower()
|
||||
.str.contains("electric storage heaters|room heaters")
|
||||
& (
|
||||
self.standardised_asset_list[
|
||||
self.EPC_API_DATA_NAMES["mainheatcont-description"]
|
||||
]
|
||||
!= "Controls for high heat retention storage heaters"
|
||||
)
|
||||
) & (
|
||||
~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
|
||||
["district heating", "communal heating", "communal gas boiler"]
|
||||
)
|
||||
& ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM]
|
||||
.astype(str)
|
||||
.str.contains("gas ")
|
||||
!= "Controls for high heat retention storage heaters"
|
||||
)
|
||||
) & (
|
||||
~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin(
|
||||
["district heating", "communal heating", "communal gas boiler"]
|
||||
)
|
||||
& ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM]
|
||||
.astype(str)
|
||||
.str.contains("gas ")
|
||||
)
|
||||
|
||||
# Basic check - both of the previous two shouldn't be true simultaneously
|
||||
if (
|
||||
|
|
@ -2148,8 +2162,8 @@ class AssetList:
|
|||
self.standardised_asset_list[
|
||||
"solar_non_intrusives_walls_insulated"
|
||||
] = self.standardised_asset_list[
|
||||
"non-intrusives: WFT Findings"
|
||||
].str.lower().str.strip().isin(
|
||||
"non-intrusives: WFT Findings"
|
||||
].str.lower().str.strip().isin(
|
||||
[
|
||||
"retro drilled",
|
||||
"retro filled",
|
||||
|
|
@ -2158,8 +2172,8 @@ class AssetList:
|
|||
"retro drilled and filled",
|
||||
]
|
||||
) | self.standardised_asset_list[
|
||||
"non-intrusives: WFT Findings"
|
||||
].str.lower().str.strip().str.contains(
|
||||
"non-intrusives: WFT Findings"
|
||||
].str.lower().str.strip().str.contains(
|
||||
"retro drilled"
|
||||
)
|
||||
else:
|
||||
|
|
@ -2176,19 +2190,14 @@ class AssetList:
|
|||
)
|
||||
|
||||
self.standardised_asset_list["solar_epc_walls_insulated"] = (
|
||||
self.standardised_asset_list[
|
||||
self.EPC_API_DATA_NAMES[
|
||||
"walls-description"]]
|
||||
.str.lower()
|
||||
.str.contains("|".join(
|
||||
self.EPC_INSULATED_WALLS_SUBSTRINGS))
|
||||
) | (
|
||||
self.standardised_asset_list[
|
||||
"walls_u_value"].apply(
|
||||
lambda x: x <= 0.7 if not pd.isnull(
|
||||
x) else False
|
||||
)
|
||||
)
|
||||
self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]]
|
||||
.str.lower()
|
||||
.str.contains("|".join(self.EPC_INSULATED_WALLS_SUBSTRINGS))
|
||||
) | (
|
||||
self.standardised_asset_list["walls_u_value"].apply(
|
||||
lambda x: x <= 0.7 if not pd.isnull(x) else False
|
||||
)
|
||||
)
|
||||
|
||||
roof_data = []
|
||||
for desc in self.standardised_asset_list[
|
||||
|
|
@ -2230,20 +2239,20 @@ class AssetList:
|
|||
self.standardised_asset_list[
|
||||
"solar_epc_loft_needs_topup"
|
||||
] = self.standardised_asset_list[
|
||||
self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS
|
||||
].apply(
|
||||
self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS
|
||||
].apply(
|
||||
lambda x: int(x) < 200 if str(x).isdigit() else False
|
||||
) | (
|
||||
(
|
||||
self.standardised_asset_list["is_loft"]
|
||||
| self.standardised_asset_list["is_pitched"]
|
||||
)
|
||||
& (
|
||||
self.standardised_asset_list[
|
||||
self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS
|
||||
].isin(["below average", "none"])
|
||||
)
|
||||
(
|
||||
self.standardised_asset_list["is_loft"]
|
||||
| self.standardised_asset_list["is_pitched"]
|
||||
)
|
||||
& (
|
||||
self.standardised_asset_list[
|
||||
self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS
|
||||
].isin(["below average", "none"])
|
||||
)
|
||||
)
|
||||
|
||||
self.standardised_asset_list["epc_has_floor_recommendation"] = (
|
||||
self.standardised_asset_list["epc_has_floor_recommendation"].fillna(False)
|
||||
|
|
@ -2252,16 +2261,15 @@ class AssetList:
|
|||
# Check if the boiler is electric
|
||||
# We check if it contains both the terms boiler & electric
|
||||
self.standardised_asset_list["has_electric_boiler"] = (
|
||||
self.standardised_asset_list[
|
||||
self.EPC_API_DATA_NAMES["mainheat-description"]
|
||||
]
|
||||
.str.lower()
|
||||
.isin(["boiler and radiators, electric"])
|
||||
) | (
|
||||
self.standardised_asset_list[
|
||||
self.STANDARD_HEATING_SYSTEM]
|
||||
== "electric boiler"
|
||||
)
|
||||
self.standardised_asset_list[
|
||||
self.EPC_API_DATA_NAMES["mainheat-description"]
|
||||
]
|
||||
.str.lower()
|
||||
.isin(["boiler and radiators, electric"])
|
||||
) | (
|
||||
self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM]
|
||||
== "electric boiler"
|
||||
)
|
||||
|
||||
####################################
|
||||
# Check solar eligibility
|
||||
|
|
@ -2399,11 +2407,11 @@ class AssetList:
|
|||
|
||||
empty_cavity_map = {
|
||||
"non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE
|
||||
+ ": ",
|
||||
+ ": ",
|
||||
"non_intrusive_indicates_empty_cavity_has_solar": f"{self.EMPTY_CAVITY_NON_INTRUSIVE} - property "
|
||||
"already has solar: ",
|
||||
"already has solar: ",
|
||||
"non_intrusive_indicates_empty_cavity_no_year_filter": f"{self.EMPTY_CAVITY_NON_INTRUSIVE}, "
|
||||
f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ",
|
||||
f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ",
|
||||
}
|
||||
for variable, description in empty_cavity_map.items():
|
||||
self.standardised_asset_list["cavity_reason"] = np.where(
|
||||
|
|
@ -2419,8 +2427,8 @@ class AssetList:
|
|||
(
|
||||
self.standardised_asset_list["epc_indicates_empty_cavity"]
|
||||
& ~self.standardised_asset_list[
|
||||
"non_intrusive_indicates_empty_cavity"
|
||||
]
|
||||
"non_intrusive_indicates_empty_cavity"
|
||||
]
|
||||
& (
|
||||
self.standardised_asset_list["non-intrusives: WFT Findings"]
|
||||
.str.lower()
|
||||
|
|
@ -2445,8 +2453,8 @@ class AssetList:
|
|||
(
|
||||
self.standardised_asset_list["epc_indicates_empty_cavity"]
|
||||
& ~self.standardised_asset_list[
|
||||
"non_intrusive_indicates_empty_cavity"
|
||||
]
|
||||
"non_intrusive_indicates_empty_cavity"
|
||||
]
|
||||
& self.standardised_asset_list[
|
||||
"non_intrusive_indicates_cavity_extraction"
|
||||
]
|
||||
|
|
@ -2461,8 +2469,8 @@ class AssetList:
|
|||
(
|
||||
self.standardised_asset_list["epc_indicates_empty_cavity"]
|
||||
& ~self.standardised_asset_list[
|
||||
"non_intrusive_indicates_empty_cavity"
|
||||
]
|
||||
"non_intrusive_indicates_empty_cavity"
|
||||
]
|
||||
& (
|
||||
self.standardised_asset_list["non-intrusives: Insulated"]
|
||||
== "RETRO DRILLED"
|
||||
|
|
@ -2478,8 +2486,8 @@ class AssetList:
|
|||
(
|
||||
self.standardised_asset_list["epc_indicates_empty_cavity"]
|
||||
& ~self.standardised_asset_list[
|
||||
"non_intrusive_indicates_empty_cavity"
|
||||
]
|
||||
"non_intrusive_indicates_empty_cavity"
|
||||
]
|
||||
& (
|
||||
self.standardised_asset_list["non-intrusives: Insulated"]
|
||||
== "FILLED AT BUILD"
|
||||
|
|
@ -2495,8 +2503,8 @@ class AssetList:
|
|||
(
|
||||
self.standardised_asset_list["epc_indicates_empty_cavity"]
|
||||
& ~self.standardised_asset_list[
|
||||
"non_intrusive_indicates_empty_cavity"
|
||||
]
|
||||
"non_intrusive_indicates_empty_cavity"
|
||||
]
|
||||
& pd.isnull(self.standardised_asset_list["cavity_reason"])
|
||||
),
|
||||
f"{self.EPC_EMPTY}: " + self.standardised_asset_list["SAP Category"],
|
||||
|
|
@ -2640,7 +2648,7 @@ class AssetList:
|
|||
identified_work = self.standardised_asset_list[
|
||||
~pd.isnull(self.standardised_asset_list["cavity_reason"])
|
||||
| ~pd.isnull(self.standardised_asset_list["solar_reason"])
|
||||
][self.DOMNA_PROPERTY_ID].values
|
||||
][self.DOMNA_PROPERTY_ID].values
|
||||
|
||||
if self.DOMNA_PROPERTY_ID in self.outcomes.columns:
|
||||
self.outcomes_for_output = self.outcomes[
|
||||
|
|
@ -2675,12 +2683,12 @@ class AssetList:
|
|||
blocks_of_flats = self.standardised_asset_list[
|
||||
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE]
|
||||
== "block of flats"
|
||||
]
|
||||
]
|
||||
|
||||
non_blocks_of_flats = self.standardised_asset_list[
|
||||
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE]
|
||||
!= "block of flats"
|
||||
]
|
||||
]
|
||||
|
||||
# Produce some aggregate figures
|
||||
self.work_type_figures = {
|
||||
|
|
@ -2723,7 +2731,7 @@ class AssetList:
|
|||
blocks = self.standardised_asset_list[
|
||||
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE]
|
||||
== "block of flats"
|
||||
].copy()
|
||||
].copy()
|
||||
|
||||
if blocks.empty:
|
||||
return
|
||||
|
|
@ -2860,7 +2868,7 @@ class AssetList:
|
|||
self.standardised_asset_list = self.standardised_asset_list[
|
||||
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE]
|
||||
!= "block of flats"
|
||||
]
|
||||
]
|
||||
|
||||
self.standardised_asset_list = pd.concat(
|
||||
[self.standardised_asset_list, expanded_blocks], ignore_index=True
|
||||
|
|
@ -2940,7 +2948,7 @@ class AssetList:
|
|||
# find any block refs with more than 50% emptires
|
||||
viable_empty_blocks = self.block_analysis_df[
|
||||
self.block_analysis_df["Percentage of Empties"] >= 0.50
|
||||
]
|
||||
]
|
||||
|
||||
if not viable_empty_blocks.empty:
|
||||
project_code_lookup = viable_empty_blocks[["Block Reference"]].copy()
|
||||
|
|
@ -3179,7 +3187,7 @@ class AssetList:
|
|||
|
||||
contact_details = pd.read_excel(local_filepath, sheet_name=sheet_name)[
|
||||
[self.contact_detail_fields["landlord_property_id"]] + details_colnames
|
||||
]
|
||||
]
|
||||
contact_details = contact_details[
|
||||
~pd.isnull(
|
||||
contact_details[self.contact_detail_fields["landlord_property_id"]]
|
||||
|
|
@ -3572,13 +3580,10 @@ class AssetList:
|
|||
"Non-Intrusives: Date Checked <LISTING non_intrusives__date_checked>": date_of_inspections,
|
||||
"Non-Intrusives: Wall Type <LISTING non_intrusives__wall_type>": non_intrusives_construction,
|
||||
"Non-intrusives: Insulation <LISTING non_intrusives__insulation>": non_intrusives_insulated,
|
||||
"Non-intrusives: Insulation Material <LISTING non_intrusives__insulation_material>":
|
||||
non_intrusives_insulation_material,
|
||||
"Non-Intrusives: CIGA Check Required <LISTING non_intrusives__ciga_check_required>":
|
||||
non_intrusives_ciga_check_required,
|
||||
"Non-intrusives: Insulation Material <LISTING non_intrusives__insulation_material>": non_intrusives_insulation_material,
|
||||
"Non-Intrusives: CIGA Check Required <LISTING non_intrusives__ciga_check_required>": non_intrusives_ciga_check_required,
|
||||
"Non-Intrusives: PV Access Issues <LISTING non_intrusives__access_issues>": non_intrusives_pv_access,
|
||||
"Non-Intrusives: Roof Orientation <LISTING non_intrusives__roof_orientation>":
|
||||
non_intrusives_roof_orientation,
|
||||
"Non-Intrusives: Roof Orientation <LISTING non_intrusives__roof_orientation>": non_intrusives_roof_orientation,
|
||||
"Non-Intrusives: Surveyor Notes <LISTING non_intrusives__surveyor_notes>": non_intrusives_surveyor_notes,
|
||||
"Non-Intrusives: Surveyor Name <LISTING non_intrusives__surveyor_name>": non_intrusives_surveyor_name,
|
||||
"CIGA: Date Requested <LISTING ciga__date_requested>": None, # TODO: Don't have this for the moment
|
||||
|
|
@ -3755,8 +3760,8 @@ class AssetList:
|
|||
# We compare address line 1 to full address
|
||||
if any(
|
||||
df[self.STANDARD_FULL_ADDRESS]
|
||||
.str.lower()
|
||||
.str.contains(row["Address Line 1"].lower(), na=False)
|
||||
.str.lower()
|
||||
.str.contains(row["Address Line 1"].lower(), na=False)
|
||||
):
|
||||
df = df[
|
||||
df[self.STANDARD_FULL_ADDRESS]
|
||||
|
|
@ -3996,7 +4001,7 @@ class AssetList:
|
|||
|
||||
matched = matched[
|
||||
matched["houseno"].astype(str) == house_no_to_match
|
||||
]
|
||||
]
|
||||
if matched.shape[0] == 1:
|
||||
lookup_i.append(
|
||||
{
|
||||
|
|
@ -4021,7 +4026,7 @@ class AssetList:
|
|||
)[0]
|
||||
matched = matched[
|
||||
matched[self.STANDARD_FULL_ADDRESS] == best_match
|
||||
]
|
||||
]
|
||||
lookup_i.append(
|
||||
{
|
||||
"row_id": x["row_id"],
|
||||
|
|
@ -4332,7 +4337,7 @@ class AssetList:
|
|||
df = self.standardised_asset_list[
|
||||
self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID]
|
||||
== row[master_id_colnames[idx]]
|
||||
]
|
||||
]
|
||||
if df.shape[0] == 1:
|
||||
matched.append(
|
||||
{
|
||||
|
|
@ -4438,7 +4443,7 @@ class AssetList:
|
|||
)[1]
|
||||
)
|
||||
> 90
|
||||
]
|
||||
]
|
||||
|
||||
if df.shape[0] == 0:
|
||||
unmatched.append(row["row_id"])
|
||||
|
|
@ -4446,8 +4451,8 @@ class AssetList:
|
|||
|
||||
if any(
|
||||
df[self.STANDARD_FULL_ADDRESS]
|
||||
.str.lower()
|
||||
.str.contains(
|
||||
.str.lower()
|
||||
.str.contains(
|
||||
" ".join(
|
||||
[row[house_no_col], row["Street / Block Name"]]
|
||||
).lower()
|
||||
|
|
@ -4474,7 +4479,7 @@ class AssetList:
|
|||
row[property_type_col].split(" ")[-1].lower()
|
||||
)
|
||||
& (df[self.STANDARD_PROPERTY_TYPE] != "block of flats")
|
||||
]
|
||||
]
|
||||
|
||||
if df.shape[0] != 1:
|
||||
# We have multiple matches - it's likely because the landlord has a duplicate
|
||||
|
|
|
|||
|
|
@ -21,6 +21,11 @@ EPC_AUTH_TOKEN = os.getenv(
|
|||
OPENAI_API_KEY = os.getenv(
|
||||
"OPENAI_API_KEY",
|
||||
)
|
||||
print(
|
||||
f"[debug] OPENAI_API_KEY loaded: "
|
||||
f"{OPENAI_API_KEY[:8]}...{OPENAI_API_KEY[-4:] if OPENAI_API_KEY else 'NONE'} "
|
||||
f"(len={len(OPENAI_API_KEY) if OPENAI_API_KEY else 0})"
|
||||
)
|
||||
|
||||
|
||||
def extract_address1(
|
||||
|
|
@ -74,23 +79,23 @@ def app():
|
|||
"""
|
||||
|
||||
data_folder = "/workspaces/model/asset_list"
|
||||
data_filename = "2026-04-22T08_22_00.779745_61049fd3.xlsx"
|
||||
sheet_name = "in"
|
||||
postcode_column = "postcode_clean"
|
||||
address1_column = "address2uprn_address"
|
||||
data_filename = "input.xlsx"
|
||||
sheet_name = "Handovers"
|
||||
postcode_column = "POSTCODE"
|
||||
address1_column = "Full Addres"
|
||||
address1_method = None
|
||||
fulladdress_column = "address2uprn_address"
|
||||
fulladdress_column = "Full Addres"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = "address2uprn_uprn"
|
||||
landlord_property_type = "Property Type" # Good to include if landlord gave
|
||||
landlord_built_form = "Built Form" # Good to include if landlord gave
|
||||
landlord_os_uprn = "domna_found_uprn"
|
||||
landlord_property_type = "PROPERTY TYPE" # Good to include if landlord gave
|
||||
landlord_built_form = "Type Description" # Good to include if landlord gave
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "UPRN"
|
||||
landlord_property_id = "PROP REF"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
|
|
@ -131,6 +136,7 @@ def app():
|
|||
landlord_sap=landlord_sap,
|
||||
landlord_block_reference=landlord_block_reference,
|
||||
phase=phase,
|
||||
openai_api_key=OPENAI_API_KEY,
|
||||
)
|
||||
asset_list.init_standardise()
|
||||
|
||||
|
|
@ -462,3 +468,9 @@ def app():
|
|||
asset_list.duplicated_addresses.to_excel(
|
||||
writer, sheet_name="Duplicate Properties", index=False
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
for key,value in dict.items():
|
||||
lsakjfldsa
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
API_KEY = example-api-key
|
||||
ENVIRONMENT = local
|
||||
SECRET_KEY = YOUR_SECRET_KEY
|
||||
ALGORITHM = HS256
|
||||
ALGORITHM = HS256
|
||||
OPEN_EPC_API_TOKEN = your_token_here
|
||||
|
|
@ -1,8 +1,6 @@
|
|||
from typing import Optional
|
||||
|
||||
from epc_api.client import EpcClient
|
||||
import os
|
||||
from urllib.parse import urlencode
|
||||
import pandas as pd
|
||||
from utils.logger import setup_logger
|
||||
import json
|
||||
|
|
@ -17,141 +15,63 @@ from utils.s3 import (
|
|||
from datetime import datetime
|
||||
|
||||
from backend.utils.addressMatch import AddressMatch
|
||||
from backend.address2UPRN.scoring import all_uprns_match, rank_address_similarity
|
||||
from datatypes.epc.domain.historic_epc_matching import (
|
||||
match_addresses_for_postcode,
|
||||
)
|
||||
from backend.epc_client.epc_client_service import EpcClientService
|
||||
from datatypes.epc.domain.historic_epc_matching import ScoredHistoricEpc
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv(
|
||||
"EPC_AUTH_TOKEN",
|
||||
)
|
||||
def get_epc_data_with_postcode(postcode: str) -> pd.DataFrame:
|
||||
|
||||
if EPC_AUTH_TOKEN is None:
|
||||
raise RuntimeError("EPC_AUTH_TOKEN not defined in env")
|
||||
token = os.getenv("OPEN_EPC_API_TOKEN")
|
||||
if token is None:
|
||||
raise RuntimeError("OPEN_EPC_API_TOKEN not defined in env")
|
||||
|
||||
|
||||
def score_addresses(
|
||||
df: pd.DataFrame,
|
||||
user_address: str,
|
||||
column: str = "address",
|
||||
) -> pd.Series:
|
||||
if column not in df.columns:
|
||||
raise ValueError(f"Missing column: {column}")
|
||||
|
||||
return df[column].apply(lambda x: AddressMatch.score(user_address, x))
|
||||
|
||||
|
||||
def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
|
||||
"""
|
||||
Recursively fetch EPC data by postcode.
|
||||
If results hit the size limit, retry with double size up to max_attempts.
|
||||
"""
|
||||
client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
||||
|
||||
url = os.path.join(client.domestic.host, "search")
|
||||
|
||||
if size:
|
||||
url += "?" + urlencode({"size": size})
|
||||
|
||||
search_resp = client.domestic.call(
|
||||
url=url,
|
||||
method="get",
|
||||
params={"postcode": postcode},
|
||||
)
|
||||
if not search_resp or "rows" not in search_resp:
|
||||
return pd.DataFrame()
|
||||
|
||||
results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"])
|
||||
|
||||
row_count = len(results_df)
|
||||
|
||||
# If we hit the size limit, there *may* be more results
|
||||
if row_count == size:
|
||||
print(
|
||||
f"⚠️ Warning: hit size limit ({size}) for postcode '{postcode}'. "
|
||||
f"Attempt {attempt}/{max_attempts}."
|
||||
)
|
||||
|
||||
if attempt < max_attempts:
|
||||
print(f"🔁 Retrying with size={size * 2}")
|
||||
return get_epc_data_with_postcode(
|
||||
postcode=postcode,
|
||||
size=size * 2,
|
||||
attempt=attempt + 1,
|
||||
max_attempts=max_attempts,
|
||||
)
|
||||
else:
|
||||
print(
|
||||
"🚨 Max attempts reached. Results may be truncated. "
|
||||
"(Please do a manual review by the tech team.)"
|
||||
)
|
||||
|
||||
return results_df
|
||||
|
||||
|
||||
def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
|
||||
"""
|
||||
Returns True if all non-null UPRNs in df match the given uprn.
|
||||
Returns False otherwise.
|
||||
"""
|
||||
|
||||
if column not in df.columns:
|
||||
return False
|
||||
|
||||
# Drop nulls and normalise to string
|
||||
uprns = df[column].dropna().astype(str).str.strip().unique()
|
||||
|
||||
# No valid UPRNs to compare
|
||||
if len(uprns) == 0:
|
||||
return False
|
||||
|
||||
# Exactly one unique UPRN and it matches
|
||||
return len(uprns) == 1 and uprns[0] == str(uprn)
|
||||
|
||||
|
||||
def get_uprn_candidates(
|
||||
df: pd.DataFrame,
|
||||
user_address: str,
|
||||
address_column: str = "address",
|
||||
uprn_column: str = "uprn",
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Annotate EPC results with lexicographical similarity scores and ranks.
|
||||
|
||||
Returns a DataFrame sorted by descending lexiscore.
|
||||
DOES NOT choose or return a UPRN.
|
||||
"""
|
||||
|
||||
if address_column not in df.columns:
|
||||
raise ValueError(f"Missing column: {address_column}")
|
||||
|
||||
if uprn_column not in df.columns:
|
||||
raise ValueError(f"Missing column: {uprn_column}")
|
||||
|
||||
out = df.copy()
|
||||
|
||||
user_norm = AddressMatch.normalise_address(user_address)
|
||||
|
||||
out["lexiscore"] = out[address_column].apply(
|
||||
lambda x: AddressMatch.levenshtein(user_norm, x)
|
||||
service = EpcClientService(auth_token=token)
|
||||
results = service.search_by_postcode(postcode)
|
||||
return pd.DataFrame(
|
||||
[{"address": r.address_line_1, "uprn": r.uprn} for r in results]
|
||||
)
|
||||
|
||||
# Normalise UPRN to string
|
||||
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
|
||||
|
||||
# Rank: 1 = best match
|
||||
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
|
||||
def get_uprn_from_historic_epc(
|
||||
user_inputed_address: str,
|
||||
postcode: str,
|
||||
) -> Optional[tuple[str, str, float]]:
|
||||
"""Resolve a UPRN via historic EPC S3 data.
|
||||
|
||||
return out.sort_values(
|
||||
["lexirank", "lexiscore"],
|
||||
ascending=[True, False],
|
||||
)
|
||||
Returns (uprn, address, lexiscore) when the historic dataset agrees on a
|
||||
single rank-1 UPRN, None otherwise (missing postcode file, zero score,
|
||||
or ambiguous top rank). The score gate is `unambiguous_uprn`'s own
|
||||
(score > 0); the 0.7 heuristic used for the new-EPC source isn't applied
|
||||
here because historic addresses use a more verbose format that
|
||||
systematically depresses lexiscores.
|
||||
"""
|
||||
|
||||
try:
|
||||
result = match_addresses_for_postcode(user_inputed_address, postcode)
|
||||
except FileNotFoundError:
|
||||
return None
|
||||
|
||||
uprn: Optional[str] = result.unambiguous_uprn()
|
||||
if not uprn or uprn == "nan":
|
||||
return None
|
||||
|
||||
top: Optional[ScoredHistoricEpc] = result.top()
|
||||
if top is None:
|
||||
return None
|
||||
return uprn, top.record.address, top.lexiscore
|
||||
|
||||
|
||||
def get_uprn_with_epc_df(
|
||||
user_inputed_address: str,
|
||||
epc_df: pd.DataFrame,
|
||||
verbose: bool = False,
|
||||
):
|
||||
) -> Optional[str | tuple[str, str, float]]:
|
||||
"""
|
||||
Return uprn (str) using a pre-fetched EPC dataframe.
|
||||
This avoids calling the API multiple times for the same postcode.
|
||||
|
|
@ -159,7 +79,7 @@ def get_uprn_with_epc_df(
|
|||
if epc_df.empty:
|
||||
return None
|
||||
|
||||
scored_df = get_uprn_candidates(
|
||||
scored_df = rank_address_similarity(
|
||||
epc_df,
|
||||
user_address=user_inputed_address,
|
||||
)
|
||||
|
|
@ -168,14 +88,14 @@ def get_uprn_with_epc_df(
|
|||
best_score = scored_df.iloc[0]["lexiscore"]
|
||||
|
||||
# # Return None if score is below threshold
|
||||
# if best_score < 0.7:
|
||||
# return None
|
||||
if best_score < 0.7:
|
||||
return None
|
||||
|
||||
# All rank-1 rows (possible draw)
|
||||
top_rank_df = scored_df[scored_df["lexirank"] == 1]
|
||||
|
||||
# If rank-1 rows do not agree on a single UPRN → ambiguous
|
||||
if not df_has_single_uprn(top_rank_df, uprn=top_rank_df.iloc[0]["uprn"]):
|
||||
if not all_uprns_match(top_rank_df, target_uprn=top_rank_df.iloc[0]["uprn"]):
|
||||
return None
|
||||
|
||||
address = top_rank_df["address"].values[0]
|
||||
|
|
@ -185,7 +105,8 @@ def get_uprn_with_epc_df(
|
|||
# Safe to return the agreed UPRN
|
||||
found_uprn = top_rank_df.iloc[0]["uprn"]
|
||||
|
||||
if found_uprn == "":
|
||||
# Handling numeric missingness in new api
|
||||
if found_uprn in ["", "nan"]:
|
||||
return None
|
||||
|
||||
if verbose:
|
||||
|
|
@ -201,20 +122,35 @@ def get_uprn(
|
|||
):
|
||||
"""
|
||||
Return uprn (str)
|
||||
Return False if failed to find a sensible matching epc
|
||||
Return None when epc found but no UPRN
|
||||
Return None when no sensible match is found in either EPC source.
|
||||
|
||||
This function fetches EPC data via API for a single postcode.
|
||||
For processing multiple addresses in the same postcode, use get_uprn_with_epc_df instead.
|
||||
Tries the new EPC API first; if that yields no confident match, falls
|
||||
back to the historic EPC dataset on S3.
|
||||
|
||||
For processing multiple addresses in the same postcode, use
|
||||
get_uprn_with_epc_df instead.
|
||||
"""
|
||||
df = get_epc_data_with_postcode(postcode=postcode)
|
||||
|
||||
return get_uprn_with_epc_df(
|
||||
result: Optional[tuple[str, str, float]] = get_uprn_with_epc_df(
|
||||
user_inputed_address=user_inputed_address,
|
||||
epc_df=df,
|
||||
verbose=verbose,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
if not result:
|
||||
result = get_uprn_from_historic_epc(
|
||||
user_inputed_address=user_inputed_address,
|
||||
postcode=postcode,
|
||||
)
|
||||
if result:
|
||||
logger.info(f"Historic EPC matched {user_inputed_address} in {postcode}")
|
||||
|
||||
if not result:
|
||||
return None
|
||||
|
||||
return result if verbose else result[0]
|
||||
|
||||
|
||||
def resolve_uprns_for_postcode_group(
|
||||
group_df: pd.DataFrame,
|
||||
|
|
@ -235,7 +171,7 @@ def resolve_uprns_for_postcode_group(
|
|||
for _, row in group_df.iterrows():
|
||||
user_address = str(row[address_col]).strip()
|
||||
|
||||
scored_df = get_uprn_candidates(
|
||||
scored_df = rank_address_similarity(
|
||||
epc_df,
|
||||
user_address=user_address,
|
||||
)
|
||||
|
|
@ -268,7 +204,7 @@ def resolve_uprns_for_postcode_group(
|
|||
|
||||
top_rank_df = scored_df[scored_df["lexirank"] == 1]
|
||||
|
||||
if not df_has_single_uprn(top_rank_df, top_rank_df.iloc[0]["uprn"]):
|
||||
if not all_uprns_match(top_rank_df, top_rank_df.iloc[0]["uprn"]):
|
||||
results.append(
|
||||
{
|
||||
"found_uprn": None,
|
||||
|
|
@ -504,12 +440,29 @@ def handler(event, context, local=False):
|
|||
continue
|
||||
|
||||
# Get UPRN using the pre-fetched EPC data with all return options
|
||||
result = get_uprn_with_epc_df(
|
||||
result: Optional[tuple[str, str, float]] = get_uprn_with_epc_df(
|
||||
user_inputed_address=address2uprn_user_input,
|
||||
epc_df=epc_df,
|
||||
verbose=True,
|
||||
)
|
||||
|
||||
# Fallback to historic EPC if new EPC produced no match
|
||||
if not result:
|
||||
try:
|
||||
result = get_uprn_from_historic_epc(
|
||||
user_inputed_address=address2uprn_user_input,
|
||||
postcode=postcode,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Historic EPC lookup failed for {address2uprn_user_input} in {postcode}: {e}"
|
||||
)
|
||||
result = None
|
||||
if result:
|
||||
logger.info(
|
||||
f"Historic EPC matched {address2uprn_user_input} in {postcode}"
|
||||
)
|
||||
|
||||
# Parse result tuple if successful
|
||||
if result:
|
||||
uprn, found_address, score = result
|
||||
|
|
|
|||
56
backend/address2UPRN/scoring.py
Normal file
56
backend/address2UPRN/scoring.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
import pandas as pd
|
||||
|
||||
from backend.utils.addressMatch import AddressMatch
|
||||
|
||||
|
||||
def all_uprns_match(
|
||||
df: pd.DataFrame,
|
||||
target_uprn: str,
|
||||
column: str = "uprn",
|
||||
) -> bool:
|
||||
if column not in df.columns:
|
||||
return False
|
||||
|
||||
uprns = df[column].dropna().astype(str).str.strip().unique()
|
||||
|
||||
if len(uprns) == 0:
|
||||
return False
|
||||
|
||||
return len(uprns) == 1 and uprns[0] == str(target_uprn)
|
||||
|
||||
|
||||
def rank_address_similarity(
|
||||
address_list_df: pd.DataFrame,
|
||||
user_address: str,
|
||||
address_column: str = "address",
|
||||
uprn_column: str = "uprn",
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Annotate EPC results with lexicographical similarity scores and ranks.
|
||||
|
||||
Returns a DataFrame sorted by descending lexiscore.
|
||||
DOES NOT choose or return a UPRN.
|
||||
"""
|
||||
|
||||
if address_column not in address_list_df.columns:
|
||||
raise ValueError(f"Missing column: {address_column}")
|
||||
|
||||
if uprn_column not in address_list_df.columns:
|
||||
raise ValueError(f"Missing column: {uprn_column}")
|
||||
|
||||
out = address_list_df.copy()
|
||||
|
||||
user_norm = AddressMatch.normalise_address(user_address)
|
||||
|
||||
out["lexiscore"] = out[address_column].apply(
|
||||
lambda x: AddressMatch.levenshtein(user_norm, x)
|
||||
)
|
||||
|
||||
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
|
||||
|
||||
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
|
||||
|
||||
return out.sort_values(
|
||||
["lexirank", "lexiscore"],
|
||||
ascending=[True, False],
|
||||
)
|
||||
81
backend/address2UPRN/tests/populate_lodgement_dates.py
Normal file
81
backend/address2UPRN/tests/populate_lodgement_dates.py
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
import csv
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import pandas as pd
|
||||
from epc_api.client import EpcClient
|
||||
|
||||
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
|
||||
SIDECAR_PATH = Path(__file__).parent / "test_lodgement_dates.json"
|
||||
|
||||
|
||||
def fetch_postcode_records(client: EpcClient, postcode: str) -> pd.DataFrame:
|
||||
url = os.path.join(client.domestic.host, "search")
|
||||
url += "?" + urlencode({"size": 500})
|
||||
resp = client.domestic.call(url=url, method="get", params={"postcode": postcode})
|
||||
if not resp or "rows" not in resp:
|
||||
return pd.DataFrame()
|
||||
return pd.DataFrame(resp["rows"], columns=resp["column-names"])
|
||||
|
||||
|
||||
def main():
|
||||
auth_token = os.getenv("EPC_AUTH_TOKEN")
|
||||
if not auth_token:
|
||||
raise RuntimeError("EPC_AUTH_TOKEN not set")
|
||||
|
||||
client = EpcClient(auth_token=auth_token)
|
||||
|
||||
sidecar = {}
|
||||
if SIDECAR_PATH.exists():
|
||||
sidecar = json.loads(SIDECAR_PATH.read_text())
|
||||
|
||||
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
|
||||
rows = list(csv.DictReader(f))
|
||||
|
||||
by_postcode: dict[str, list[dict]] = {}
|
||||
for row in rows:
|
||||
if row["Manual UPRN Code"] == "None":
|
||||
continue
|
||||
by_postcode.setdefault(row["Postcode"], []).append(row)
|
||||
|
||||
for postcode, postcode_rows in by_postcode.items():
|
||||
print(f"Fetching {postcode} ({len(postcode_rows)} rows)...")
|
||||
try:
|
||||
epc_df = fetch_postcode_records(client, postcode)
|
||||
except Exception as e:
|
||||
print(f" ERROR: {e}")
|
||||
continue
|
||||
|
||||
if epc_df.empty:
|
||||
print(f" No results from old API for {postcode}")
|
||||
continue
|
||||
|
||||
epc_df["uprn"] = epc_df["uprn"].astype(str).str.replace(r"\.0$", "", regex=True)
|
||||
|
||||
for row in postcode_rows:
|
||||
key = f"{row['User Input']}|{row['Postcode']}"
|
||||
if key in sidecar:
|
||||
continue
|
||||
|
||||
expected_uprn = str(row["Manual UPRN Code"]).strip()
|
||||
match = epc_df[epc_df["uprn"] == expected_uprn]
|
||||
|
||||
if match.empty:
|
||||
print(f" WARN: UPRN {expected_uprn} not found in old API for {postcode}")
|
||||
sidecar[key] = {"lodgement_date": None, "found_in_old_api": False}
|
||||
else:
|
||||
lodgement_date = match.iloc[0].get("lodgement-date")
|
||||
sidecar[key] = {
|
||||
"lodgement_date": str(lodgement_date) if lodgement_date else None,
|
||||
"found_in_old_api": True,
|
||||
}
|
||||
print(f" {row['User Input']}: {lodgement_date}")
|
||||
|
||||
SIDECAR_PATH.write_text(json.dumps(sidecar, indent=2))
|
||||
print(f"\nWritten to {SIDECAR_PATH}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -1,12 +1,24 @@
|
|||
# tests/test_address_to_uprn_csv.py
|
||||
|
||||
import csv
|
||||
import time
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from backend.address2UPRN.main import get_uprn
|
||||
|
||||
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
|
||||
|
||||
# Delay between live EPC API calls to stay under the (undocumented) rate limit.
|
||||
# Each parametrized case fires at least one EPC request; without throttling,
|
||||
# GitHub-hosted runners burst fast enough to hit 429s.
|
||||
EPC_THROTTLE_SECONDS = 1.0
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _throttle_epc_requests():
|
||||
yield
|
||||
time.sleep(EPC_THROTTLE_SECONDS)
|
||||
|
||||
|
||||
def load_test_cases():
|
||||
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
|
||||
|
|
|
|||
|
|
@ -168,8 +168,8 @@ FLAT 8 599 HARROW ROAD,W10 4RA,None
|
|||
"Apartment 18 Block D, 32, Hornsey Road",N7 7AT,10012792383
|
||||
24b Honley Road,SE6 2HZ,None
|
||||
FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
|
||||
2 COLLEGE HOUSE,CM7 1JS,100091449870
|
||||
3 COLLEGE HOUSE,CM7 1JS,100091449871
|
||||
2 COLLEGE HOUSE,CM7 1JS,None
|
||||
3 COLLEGE HOUSE,CM7 1JS,None
|
||||
1 Anita Street,M4 5DU,None
|
||||
2 Anita Street,M4 5DU,77123061
|
||||
5 Anita Street,M4 5DU,77123081
|
||||
|
|
@ -279,6 +279,7 @@ FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
|
|||
80a Victoria Square,M4 5DZ,77211231
|
||||
81a Victoria Square,M4 5DZ,77211232
|
||||
82 Victoria Square,M4 5DZ,None
|
||||
82a Victoria Square,M4 5DZ,77211233
|
||||
83a Victoria Square,M4 5DZ,77211234
|
||||
84a Victoria Square,M4 5DZ,None
|
||||
85a Victoria Square,M4 5DZ,77211236
|
||||
|
|
|
|||
|
|
|
@ -39,11 +39,13 @@ class Settings(BaseSettings):
|
|||
ENGINE_SQS_URL: str = "changeme"
|
||||
CATEGORISATION_SQS_URL: str = "changeme"
|
||||
PASHUB_TO_ARA_SQS_URL: str = "changeme"
|
||||
MAGICPLAN_SQS_URL: str = "changeme"
|
||||
POSTCODE_SPLITTER_SQS_URL: str = "changeme"
|
||||
COMBINER_SQS_URL: str = "changeme"
|
||||
|
||||
# Third parties
|
||||
EPC_AUTH_TOKEN: str = "changeme"
|
||||
OPEN_EPC_API_TOKEN: str = "changeme"
|
||||
GOOGLE_SOLAR_API_KEY: str = "changeme"
|
||||
MAGICPLAN_CUSTOMER_ID: str = "changeme"
|
||||
MAGICPLAN_API_KEY: str = "changeme"
|
||||
|
|
@ -79,6 +81,7 @@ class Settings(BaseSettings):
|
|||
OSMOSIS_ACD_SHAREPOINT_ID: Optional[str] = None
|
||||
PRIVATE_PAY_SHAREPOINT_ID: Optional[str] = None
|
||||
SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID: Optional[str] = None
|
||||
OPENAI_API_KEY: Optional[str] = None
|
||||
|
||||
# Pas Hub
|
||||
PASHUB_EMAIL: Optional[str] = None
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from sqlmodel import SQLModel, Field, Relationship
|
|||
|
||||
class SourceEnum(enum.Enum): # TODO: move to domain?
|
||||
PORTFOLIO = "portfolio_id"
|
||||
HUBSPOT_DEAL = "hubspot_deal_id"
|
||||
|
||||
|
||||
class Task(SQLModel, table=True):
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ class FileTypeEnum(enum.Enum):
|
|||
ECMK_SITE_NOTE = "ecmk_site_note"
|
||||
ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note"
|
||||
ECMK_SURVEY_XML = "ecmk_survey_xml"
|
||||
MAGIC_PLAN_JSON = "magic_plan_json"
|
||||
|
||||
|
||||
class FileSourceEnum(enum.Enum):
|
||||
|
|
@ -24,6 +25,7 @@ class FileSourceEnum(enum.Enum):
|
|||
SHAREPOINT = "sharepoint"
|
||||
HUBSPOT = "hubspot"
|
||||
ECMK = "ecmk"
|
||||
MAGIC_PLAN = "magic_plan"
|
||||
|
||||
|
||||
class UploadedFile(Base):
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@ from fastapi import APIRouter, HTTPException, status
|
|||
from jose import jwt, jwe
|
||||
import json
|
||||
import datetime
|
||||
from app.config import get_settings
|
||||
from app.dependencies import get_derived_encryption_key
|
||||
from backend.app.config import get_settings
|
||||
from backend.app.dependencies import get_derived_encryption_key
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/local",
|
||||
|
|
@ -27,7 +27,12 @@ def create_dummy_token(secret: str) -> str:
|
|||
"dbId": "known_id",
|
||||
}
|
||||
|
||||
token = jwe.encrypt(json.dumps(claims), get_derived_encryption_key(secret), algorithm="dir", encryption="A256GCM")
|
||||
token = jwe.encrypt(
|
||||
json.dumps(claims),
|
||||
get_derived_encryption_key(secret),
|
||||
algorithm="dir",
|
||||
encryption="A256GCM",
|
||||
)
|
||||
return token
|
||||
|
||||
|
||||
|
|
@ -40,6 +45,8 @@ async def dummy_token():
|
|||
async def dummy_token():
|
||||
settings = get_settings()
|
||||
if settings.ENVIRONMENT != "local":
|
||||
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Dummy token can only be generated in local environment")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail="Dummy token can only be generated in local environment",
|
||||
)
|
||||
return {"dummy_token": create_dummy_token(settings.SECRET_KEY)}
|
||||
|
|
|
|||
|
|
@ -30,10 +30,7 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
|
|||
logger.error(f"Validation Errors: {exc.errors()}")
|
||||
return JSONResponse(
|
||||
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
|
||||
content=jsonable_encoder({
|
||||
"detail": exc.errors(),
|
||||
"body": exc.body
|
||||
}),
|
||||
content=jsonable_encoder({"detail": exc.errors(), "body": exc.body}),
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -66,7 +63,8 @@ app.include_router(tasks_router.router, prefix="/v1")
|
|||
app.include_router(bulk_uploads_router.router, prefix="/v1")
|
||||
|
||||
if get_settings().ENVIRONMENT == "local":
|
||||
from app.local import router as local_router
|
||||
from backend.app.local import router as local_router
|
||||
|
||||
app.include_router(local_router.router)
|
||||
|
||||
handler = Mangum(app)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import ast
|
||||
import os
|
||||
from typing import Optional
|
||||
import msgpack
|
||||
from uuid import UUID
|
||||
|
|
@ -8,6 +7,7 @@ from backend.addresses.Address import Address
|
|||
from backend.app.config import get_settings
|
||||
from backend.app.plan.data_classes import PropertyRequestData
|
||||
from backend.app.db.functions.tasks.Tasks import SubTaskInterface
|
||||
from backend.utils.cloudwatch import build_cloudwatch_log_url
|
||||
from starlette.responses import Response
|
||||
from utils.logger import setup_logger
|
||||
|
||||
|
|
@ -241,33 +241,6 @@ def parse_eco_packages(
|
|||
return measures, mapped["target_sap"], mapped["plan_type"], already_installed
|
||||
|
||||
|
||||
def build_cloudwatch_log_url(start_ms: Optional[int]) -> str:
|
||||
"""
|
||||
Build a CloudWatch Logs URL for the current Lambda invocation,
|
||||
including timestamp window from start_ms to end_ms (epoch ms).
|
||||
"""
|
||||
logger.info("Building cloudwatch logs URL")
|
||||
region = os.environ["AWS_REGION"]
|
||||
logger.info("Building cloudwatch logs URL: Got AWS region")
|
||||
log_group = os.environ["AWS_LAMBDA_LOG_GROUP_NAME"]
|
||||
logger.info("Building cloudwatch logs URL: Got lambda log group name")
|
||||
log_stream = os.environ["AWS_LAMBDA_LOG_STREAM_NAME"]
|
||||
logger.info("Building cloudwatch logs URL: Got lambda log stream name")
|
||||
|
||||
# CloudWatch console requires / encoded as $252F
|
||||
encoded_group = log_group.replace("/", "$252F")
|
||||
encoded_stream = log_stream.replace("/", "$252F")
|
||||
|
||||
# Return the full URL with time range
|
||||
return (
|
||||
f"https://console.aws.amazon.com/cloudwatch/home?"
|
||||
f"region={region}"
|
||||
f"#logsV2:log-groups/log-group/{encoded_group}"
|
||||
f"/log-events/{encoded_stream}"
|
||||
f"$3Fstart={start_ms}"
|
||||
)
|
||||
|
||||
|
||||
def handle_error(
|
||||
msg: str,
|
||||
exception: Exception,
|
||||
|
|
|
|||
|
|
@ -13,4 +13,9 @@ boto3==1.35.44
|
|||
openpyxl==3.1.5
|
||||
# Basic
|
||||
pytz
|
||||
sqlmodel
|
||||
sqlmodel
|
||||
# HTTP client
|
||||
httpx==0.28.1
|
||||
# Data
|
||||
pandas
|
||||
pandas-stubs
|
||||
|
|
@ -3,7 +3,7 @@ import time
|
|||
from typing import Any, Mapping
|
||||
|
||||
from backend.app.db.functions.tasks.Tasks import SubTaskInterface
|
||||
from backend.app.plan.utils import build_cloudwatch_log_url
|
||||
from backend.utils.cloudwatch import build_cloudwatch_log_url
|
||||
from backend.categorisation.categorisation_trigger_request import (
|
||||
CategorisationTriggerRequest,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -15,7 +15,8 @@ from backend.app.db.functions.tasks.Tasks import SubTaskInterface
|
|||
from backend.app.db.models.recommendations import PlanModel, ScenarioModel
|
||||
from backend.app.domain.classes.plan import Plan
|
||||
from backend.app.domain.classes.scenario import Scenario
|
||||
from backend.app.plan.utils import build_cloudwatch_log_url, handle_error
|
||||
from backend.app.plan.utils import handle_error
|
||||
from backend.utils.cloudwatch import build_cloudwatch_log_url
|
||||
from backend.categorisation.categorisation_trigger_request import (
|
||||
CategorisationTriggerRequest,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -23,8 +23,9 @@ from backend.app.db.functions.tasks.Tasks import SubTaskInterface
|
|||
|
||||
from backend.app.plan.schemas import PlanTriggerRequest
|
||||
from backend.app.plan.utils import (
|
||||
get_cleaned, patch_epc, extract_property_request_data, handle_error, build_cloudwatch_log_url
|
||||
get_cleaned, patch_epc, extract_property_request_data, handle_error
|
||||
)
|
||||
from backend.utils.cloudwatch import build_cloudwatch_log_url
|
||||
from backend.app.utils import sap_to_epc
|
||||
import backend.app.assumptions as assumptions
|
||||
|
||||
|
|
|
|||
|
|
@ -23,4 +23,6 @@ pyarrow==17.0.0
|
|||
fastparquet==2024.5.0
|
||||
aiohttp==3.10.10
|
||||
# find my epc
|
||||
beautifulsoup4
|
||||
beautifulsoup4
|
||||
# HTTP client (epc_client module)
|
||||
httpx==0.28.1
|
||||
3
backend/epc_client/__init__.py
Normal file
3
backend/epc_client/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from backend.epc_client.epc_client_service import EpcClientService
|
||||
|
||||
__all__ = ["EpcClientService"]
|
||||
28
backend/epc_client/_retry.py
Normal file
28
backend/epc_client/_retry.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
import time
|
||||
from typing import Callable, TypeVar
|
||||
|
||||
from backend.epc_client.exceptions import EpcRateLimitError
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def call_with_retry(
|
||||
fn: Callable[[], T],
|
||||
max_retries: int = 5,
|
||||
backoff_base: float = 1.0,
|
||||
backoff_multiplier: float = 2.0,
|
||||
max_backoff: float = 60.0,
|
||||
) -> T:
|
||||
last_exc: EpcRateLimitError | None = None
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
return fn()
|
||||
except EpcRateLimitError as exc:
|
||||
last_exc = exc
|
||||
if attempt < max_retries:
|
||||
if exc.retry_after is not None:
|
||||
delay = exc.retry_after
|
||||
else:
|
||||
delay = backoff_base * (backoff_multiplier ** attempt)
|
||||
time.sleep(min(delay, max_backoff))
|
||||
raise last_exc # type: ignore[misc]
|
||||
118
backend/epc_client/epc_client_service.py
Normal file
118
backend/epc_client/epc_client_service.py
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
# Spec: https://raw.githubusercontent.com/communitiesuk/epb-data-warehouse/main/api/api.yml
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from backend.epc_client.exceptions import (
|
||||
EpcApiError,
|
||||
EpcNotFoundError,
|
||||
EpcRateLimitError,
|
||||
)
|
||||
from backend.epc_client._retry import call_with_retry
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
from datatypes.epc.search import EpcSearchResult
|
||||
|
||||
|
||||
class EpcClientService:
|
||||
BASE_URL = "https://api.get-energy-performance-data.communities.gov.uk"
|
||||
REQUEST_TIMEOUT = 10.0
|
||||
|
||||
def __init__(self, auth_token: str) -> None:
|
||||
self._headers = {
|
||||
"Authorization": f"Bearer {auth_token}",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _parse_retry_after(resp: httpx.Response) -> Optional[float]:
|
||||
header = resp.headers.get("Retry-After")
|
||||
if header is None:
|
||||
return None
|
||||
try:
|
||||
return float(header)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
def get_by_certificate_number(self, cert_num: str) -> EpcPropertyData:
|
||||
raw = call_with_retry(lambda: self._fetch_certificate(cert_num))
|
||||
return EpcPropertyDataMapper.from_api_response(raw)
|
||||
|
||||
def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]:
|
||||
results = call_with_retry(lambda: self._search(uprn=uprn))
|
||||
if not results:
|
||||
return None
|
||||
latest = max(results, key=lambda r: r.registration_date)
|
||||
return self.get_by_certificate_number(latest.certificate_number)
|
||||
|
||||
def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
|
||||
return call_with_retry(lambda: self._search(postcode=postcode))
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Private helperEpcRateLimpolarss
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _fetch_certificate(self, cert_num: str) -> dict[str, Any]:
|
||||
resp = httpx.get(
|
||||
f"{self.BASE_URL}/api/certificate",
|
||||
params={"certificate_number": cert_num},
|
||||
headers=self._headers,
|
||||
timeout=self.REQUEST_TIMEOUT,
|
||||
)
|
||||
if resp.status_code == 404:
|
||||
raise EpcNotFoundError(cert_num)
|
||||
if resp.status_code == 429:
|
||||
raise EpcRateLimitError(
|
||||
"Rate limited by EPC API",
|
||||
retry_after=self._parse_retry_after(resp),
|
||||
)
|
||||
if not resp.is_success:
|
||||
raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}")
|
||||
return resp.json()["data"]
|
||||
|
||||
def _search(
|
||||
self,
|
||||
postcode: Optional[str] = None,
|
||||
uprn: Optional[int] = None,
|
||||
) -> list[EpcSearchResult]:
|
||||
params: dict[str, str | int] = {}
|
||||
if postcode:
|
||||
params["postcode"] = postcode
|
||||
if uprn is not None:
|
||||
params["uprn"] = uprn
|
||||
|
||||
resp = httpx.get(
|
||||
f"{self.BASE_URL}/api/domestic/search",
|
||||
params=params,
|
||||
headers=self._headers,
|
||||
timeout=self.REQUEST_TIMEOUT,
|
||||
)
|
||||
if resp.status_code == 404:
|
||||
return []
|
||||
if resp.status_code == 429:
|
||||
raise EpcRateLimitError(
|
||||
"Rate limited by EPC API",
|
||||
retry_after=self._parse_retry_after(resp),
|
||||
)
|
||||
if not resp.is_success:
|
||||
raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}")
|
||||
|
||||
rows = resp.json().get("data", [])
|
||||
return [self._parse_search_result(r) for r in rows]
|
||||
|
||||
@staticmethod
|
||||
def _parse_search_result(row: dict[str, Any]) -> EpcSearchResult:
|
||||
return EpcSearchResult(
|
||||
certificate_number=row["certificateNumber"],
|
||||
address_line_1=row["addressLine1"],
|
||||
address_line_2=row.get("addressLine2"),
|
||||
address_line_3=row.get("addressLine3"),
|
||||
address_line_4=row.get("addressLine4"),
|
||||
postcode=row["postcode"],
|
||||
post_town=row["postTown"],
|
||||
uprn=row.get("uprn"),
|
||||
current_energy_efficiency_band=row["currentEnergyEfficiencyBand"],
|
||||
registration_date=row["registrationDate"],
|
||||
)
|
||||
17
backend/epc_client/exceptions.py
Normal file
17
backend/epc_client/exceptions.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
from typing import Optional
|
||||
|
||||
|
||||
class EpcApiError(Exception):
|
||||
"""Base for all EPC client errors."""
|
||||
|
||||
|
||||
class EpcNotFoundError(EpcApiError):
|
||||
"""Raised when the API returns 404."""
|
||||
|
||||
|
||||
class EpcRateLimitError(EpcApiError):
|
||||
"""Raised when the API returns 429 and all retries are exhausted."""
|
||||
|
||||
def __init__(self, message: str, retry_after: Optional[float] = None) -> None:
|
||||
super().__init__(message)
|
||||
self.retry_after = retry_after
|
||||
0
backend/epc_client/tests/__init__.py
Normal file
0
backend/epc_client/tests/__init__.py
Normal file
48
backend/epc_client/tests/conftest.py
Normal file
48
backend/epc_client/tests/conftest.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
import json
|
||||
import pathlib
|
||||
import pytest
|
||||
|
||||
from backend.epc_client.epc_client_service import EpcClientService
|
||||
|
||||
SAMPLES_DIR = pathlib.Path("backend/epc_api/json_samples")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def rdsap_21_0_0_cert():
|
||||
return json.loads((SAMPLES_DIR / "RdSAP-Schema-21.0.0/epc.json").read_text())
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def rdsap_21_0_1_cert():
|
||||
return json.loads((SAMPLES_DIR / "RdSAP-Schema-21.0.1/epc.json").read_text())
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def epc_service():
|
||||
return EpcClientService(auth_token="test-token")
|
||||
|
||||
|
||||
def make_search_row(
|
||||
cert_num="CERT-001",
|
||||
address_line_1="1 Test Street",
|
||||
postcode="SW1A 1AA",
|
||||
post_town="London",
|
||||
uprn=100023336956,
|
||||
band="D",
|
||||
registration_date="2024-01-01",
|
||||
address_line_2=None,
|
||||
address_line_3=None,
|
||||
address_line_4=None,
|
||||
):
|
||||
return {
|
||||
"certificateNumber": cert_num,
|
||||
"addressLine1": address_line_1,
|
||||
"addressLine2": address_line_2,
|
||||
"addressLine3": address_line_3,
|
||||
"addressLine4": address_line_4,
|
||||
"postcode": postcode,
|
||||
"postTown": post_town,
|
||||
"uprn": uprn,
|
||||
"currentEnergyEfficiencyBand": band,
|
||||
"registrationDate": registration_date,
|
||||
}
|
||||
217
backend/epc_client/tests/test_client.py
Normal file
217
backend/epc_client/tests/test_client.py
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
from unittest.mock import MagicMock, patch, call
|
||||
import pytest
|
||||
|
||||
from backend.epc_client.epc_client_service import EpcClientService
|
||||
from datatypes.epc.search import EpcSearchResult
|
||||
from backend.epc_client.exceptions import EpcNotFoundError, EpcRateLimitError
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
from backend.epc_client.tests.conftest import make_search_row
|
||||
|
||||
|
||||
def _mock_response(status_code=200, json_data=None, headers=None):
|
||||
resp = MagicMock()
|
||||
resp.status_code = status_code
|
||||
resp.is_success = 200 <= status_code < 300
|
||||
resp.json.return_value = json_data or {}
|
||||
resp.text = str(json_data)
|
||||
resp.headers = headers or {}
|
||||
return resp
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: get_by_certificate_number happy path
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_get_by_certificate_number_returns_epc_property_data(
|
||||
epc_service, rdsap_21_0_1_cert
|
||||
):
|
||||
cert_response = {"data": rdsap_21_0_1_cert}
|
||||
with patch("httpx.get", return_value=_mock_response(200, cert_response)):
|
||||
result = epc_service.get_by_certificate_number("CERT-001")
|
||||
|
||||
assert isinstance(result, EpcPropertyData)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: get_by_certificate_number 404 → EpcNotFoundError
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_get_by_certificate_number_404_raises_not_found(epc_service):
|
||||
with patch("httpx.get", return_value=_mock_response(404)):
|
||||
with pytest.raises(EpcNotFoundError):
|
||||
epc_service.get_by_certificate_number("BAD-CERT")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: 429 retried, succeeds on 3rd attempt
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_get_by_certificate_number_retries_on_429_and_succeeds(
|
||||
epc_service, rdsap_21_0_1_cert
|
||||
):
|
||||
cert_response = {"data": rdsap_21_0_1_cert}
|
||||
responses = [
|
||||
_mock_response(429),
|
||||
_mock_response(429),
|
||||
_mock_response(200, cert_response),
|
||||
]
|
||||
with patch("httpx.get", side_effect=responses), patch("time.sleep"):
|
||||
result = epc_service.get_by_certificate_number("CERT-001")
|
||||
|
||||
assert isinstance(result, EpcPropertyData)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3b: 429 with Retry-After header → sleeps for that value
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_429_retry_after_header_drives_sleep_duration(
|
||||
epc_service, rdsap_21_0_1_cert
|
||||
):
|
||||
cert_response = {"data": rdsap_21_0_1_cert}
|
||||
responses = [
|
||||
_mock_response(429, headers={"Retry-After": "7"}),
|
||||
_mock_response(200, cert_response),
|
||||
]
|
||||
with patch("httpx.get", side_effect=responses), patch(
|
||||
"backend.epc_client._retry.time.sleep"
|
||||
) as mock_sleep:
|
||||
epc_service.get_by_certificate_number("CERT-001")
|
||||
|
||||
mock_sleep.assert_called_once_with(7.0)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3c: 429 without Retry-After → falls back to exponential backoff
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_429_without_retry_after_uses_exponential_backoff(
|
||||
epc_service, rdsap_21_0_1_cert
|
||||
):
|
||||
cert_response = {"data": rdsap_21_0_1_cert}
|
||||
responses = [
|
||||
_mock_response(429),
|
||||
_mock_response(429),
|
||||
_mock_response(200, cert_response),
|
||||
]
|
||||
with patch("httpx.get", side_effect=responses), patch(
|
||||
"backend.epc_client._retry.time.sleep"
|
||||
) as mock_sleep:
|
||||
epc_service.get_by_certificate_number("CERT-001")
|
||||
|
||||
assert mock_sleep.call_args_list == [call(1.0), call(2.0)]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3d: malformed Retry-After header → falls back to exponential backoff
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_429_malformed_retry_after_falls_back_to_backoff(
|
||||
epc_service, rdsap_21_0_1_cert
|
||||
):
|
||||
cert_response = {"data": rdsap_21_0_1_cert}
|
||||
responses = [
|
||||
_mock_response(429, headers={"Retry-After": "Wed, 21 Oct 2026 07:28:00 GMT"}),
|
||||
_mock_response(200, cert_response),
|
||||
]
|
||||
with patch("httpx.get", side_effect=responses), patch(
|
||||
"backend.epc_client._retry.time.sleep"
|
||||
) as mock_sleep:
|
||||
epc_service.get_by_certificate_number("CERT-001")
|
||||
|
||||
mock_sleep.assert_called_once_with(1.0)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3e: Retry-After capped by max_backoff to avoid hostile/buggy values
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_429_retry_after_capped_by_max_backoff(epc_service, rdsap_21_0_1_cert):
|
||||
cert_response = {"data": rdsap_21_0_1_cert}
|
||||
responses = [
|
||||
_mock_response(429, headers={"Retry-After": "9999"}),
|
||||
_mock_response(200, cert_response),
|
||||
]
|
||||
with patch("httpx.get", side_effect=responses), patch(
|
||||
"backend.epc_client._retry.time.sleep"
|
||||
) as mock_sleep:
|
||||
epc_service.get_by_certificate_number("CERT-001")
|
||||
|
||||
mock_sleep.assert_called_once_with(60.0)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 4: get_by_uprn empty search → None
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_get_by_uprn_returns_none_when_no_results(epc_service):
|
||||
with patch("httpx.get", return_value=_mock_response(200, {"data": []})):
|
||||
result = epc_service.get_by_uprn(100023336956)
|
||||
|
||||
assert result is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 5: get_by_uprn multiple results → fetches latest by registration_date
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_get_by_uprn_picks_most_recent_certificate(epc_service, rdsap_21_0_1_cert):
|
||||
search_rows = [
|
||||
make_search_row(cert_num="CERT-OLD", registration_date="2022-01-01"),
|
||||
make_search_row(cert_num="CERT-NEW", registration_date="2024-06-01"),
|
||||
make_search_row(cert_num="CERT-MID", registration_date="2023-03-15"),
|
||||
]
|
||||
cert_response = {"data": rdsap_21_0_1_cert}
|
||||
|
||||
def fake_get(url, params=None, **kwargs):
|
||||
if "search" in url:
|
||||
return _mock_response(200, {"data": search_rows})
|
||||
return _mock_response(200, cert_response)
|
||||
|
||||
with patch("httpx.get", side_effect=fake_get) as mock_get:
|
||||
result = epc_service.get_by_uprn(100023336956)
|
||||
|
||||
assert isinstance(result, EpcPropertyData)
|
||||
# Second call must be for the most recent cert
|
||||
cert_call = mock_get.call_args_list[1]
|
||||
assert cert_call.kwargs["params"]["certificate_number"] == "CERT-NEW"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 6: search_by_postcode returns list[EpcSearchResult]
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_search_by_postcode_returns_results(epc_service):
|
||||
rows = [
|
||||
make_search_row(cert_num="CERT-A", address_line_1="1 High Street"),
|
||||
make_search_row(cert_num="CERT-B", address_line_1="2 High Street"),
|
||||
]
|
||||
with patch("httpx.get", return_value=_mock_response(200, {"data": rows})):
|
||||
results = epc_service.search_by_postcode("SW1A 1AA")
|
||||
|
||||
assert len(results) == 2
|
||||
assert all(isinstance(r, EpcSearchResult) for r in results)
|
||||
assert results[0].certificate_number == "CERT-A"
|
||||
assert results[1].address_line_1 == "2 High Street"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 7: search_by_postcode 404 → empty list
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_search_by_postcode_404_returns_empty_list(epc_service):
|
||||
with patch("httpx.get", return_value=_mock_response(404)):
|
||||
results = epc_service.search_by_postcode("ZZ9 9ZZ")
|
||||
|
||||
assert results == []
|
||||
31
backend/epc_client/tests/test_mapper_dispatcher.py
Normal file
31
backend/epc_client/tests/test_mapper_dispatcher.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
import pytest
|
||||
|
||||
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
|
||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 1: from_api_response with RdSAP-Schema-21.0.0 fixture → EpcPropertyData
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_from_api_response_rdsap_21_0_0(rdsap_21_0_0_cert):
|
||||
result = EpcPropertyDataMapper.from_api_response(rdsap_21_0_0_cert)
|
||||
assert isinstance(result, EpcPropertyData)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 2: from_api_response with RdSAP-Schema-21.0.1 fixture → EpcPropertyData
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_from_api_response_rdsap_21_0_1(rdsap_21_0_1_cert):
|
||||
result = EpcPropertyDataMapper.from_api_response(rdsap_21_0_1_cert)
|
||||
assert isinstance(result, EpcPropertyData)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Test 3: unknown schema_type → ValueError
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def test_from_api_response_unknown_schema_raises():
|
||||
with pytest.raises(ValueError, match="Unsupported EPC schema"):
|
||||
EpcPropertyDataMapper.from_api_response({"schema_type": "RdSAP-Schema-99.0.0"})
|
||||
0
backend/etl/__init__.py
Normal file
0
backend/etl/__init__.py
Normal file
14
backend/etl/etl_opendatacommunities/README.md
Normal file
14
backend/etl/etl_opendatacommunities/README.md
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
This website https://epc.opendatacommunities.org/ has closed down on 30th May 2026
|
||||
|
||||
So we downloaded the data and moved everything to S3 ( s3://retrofit-data-dev/histroical_epc/0_master_backup/ )
|
||||
|
||||
This scripts assumes the following:
|
||||
|
||||
1) You downloaded the master copy, uncompressed it and set it to a path so we can read the csv
|
||||
|
||||
|
||||
The script funciton is:
|
||||
|
||||
1) reads csv for all data, seperate each iteration by postcode
|
||||
2) compresses the csv and save it in the location
|
||||
3) location s3://retrofit-data-dev/historical_epc/<postcode>/compressed data.csv
|
||||
0
backend/etl/etl_opendatacommunities/__init__.py
Normal file
0
backend/etl/etl_opendatacommunities/__init__.py
Normal file
133
backend/etl/etl_opendatacommunities/main.py
Normal file
133
backend/etl/etl_opendatacommunities/main.py
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
from concurrent.futures import FIRST_COMPLETED, ThreadPoolExecutor, wait
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import boto3
|
||||
import pandas as pd
|
||||
from botocore.config import Config
|
||||
from tqdm import tqdm
|
||||
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
SRC_ROOT = Path("/workspaces/home/epc_data")
|
||||
TMP_ROOT = Path("/tmp/epc_postcodes")
|
||||
S3_BUCKET = "retrofit-data-dev"
|
||||
S3_PREFIX = "historical_epc"
|
||||
|
||||
# This scripts assume you downloading the zip, unzip it, and running it locally
|
||||
|
||||
|
||||
def sanitise(pc: pd.Series) -> pd.Series:
|
||||
return pc.astype("string").str.upper().str.replace(" ", "", regex=False)
|
||||
|
||||
|
||||
def shard_la(la_dir: Path) -> None:
|
||||
certs = pd.read_csv(la_dir / "certificates.csv", low_memory=False)
|
||||
|
||||
certs["POSTCODE_CLEAN"] = sanitise(certs["POSTCODE"])
|
||||
before = len(certs)
|
||||
certs = certs.dropna(subset=["POSTCODE_CLEAN"])
|
||||
certs = certs[certs["POSTCODE_CLEAN"] != ""]
|
||||
dropped = before - len(certs)
|
||||
if dropped:
|
||||
logger.warning(f"{la_dir.name}: dropped {dropped} rows with empty postcode")
|
||||
|
||||
for pc, group in certs.groupby("POSTCODE_CLEAN", sort=False):
|
||||
out = TMP_ROOT / f"{pc}.csv"
|
||||
group.drop(columns=["POSTCODE_CLEAN"]).to_csv(
|
||||
out, mode="a", header=not out.exists(), index=False
|
||||
)
|
||||
|
||||
|
||||
def list_existing_keys(s3: Any) -> set[str]:
|
||||
existing: set[str] = set()
|
||||
paginator = s3.get_paginator("list_objects_v2")
|
||||
pages = paginator.paginate(Bucket=S3_BUCKET, Prefix=f"{S3_PREFIX}/")
|
||||
for page in tqdm(pages, desc="list s3"):
|
||||
for obj in page.get("Contents", []):
|
||||
existing.add(obj["Key"])
|
||||
logger.info(f"Found {len(existing)} existing objects under {S3_PREFIX}/")
|
||||
return existing
|
||||
|
||||
|
||||
def upload_postcode(path: Path, s3: Any) -> None:
|
||||
df = pd.read_csv(path, low_memory=False).drop_duplicates()
|
||||
|
||||
dupes = df["LMK_KEY"].value_counts()
|
||||
bad = dupes[dupes > 1]
|
||||
if not bad.empty:
|
||||
raise ValueError(
|
||||
f"Postcode {path.stem}: LMK_KEY appears with conflicting cert data: "
|
||||
f"{bad.index.tolist()[:5]}"
|
||||
)
|
||||
|
||||
buf = BytesIO()
|
||||
df.to_csv(buf, index=False, compression="gzip")
|
||||
s3.put_object(
|
||||
Bucket=S3_BUCKET,
|
||||
Key=f"{S3_PREFIX}/{path.stem}/data.csv.gz",
|
||||
Body=buf.getvalue(),
|
||||
ContentType="text/csv",
|
||||
ContentEncoding="gzip",
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
TMP_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
la_dirs = sorted(
|
||||
p for p in SRC_ROOT.iterdir() if p.is_dir() and p.name.startswith("domestic-")
|
||||
)
|
||||
logger.info(f"Sharding {len(la_dirs)} LA folders -> {TMP_ROOT}")
|
||||
|
||||
for la in tqdm(la_dirs, desc="shard"):
|
||||
shard_la(la)
|
||||
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
config=Config(
|
||||
max_pool_connections=512, retries={"max_attempts": 5, "mode": "standard"}
|
||||
),
|
||||
)
|
||||
pc_files = sorted(TMP_ROOT.glob("*.csv"))
|
||||
logger.info(f"Found {len(pc_files)} local shards")
|
||||
|
||||
existing = list_existing_keys(s3)
|
||||
todo = [p for p in pc_files if f"{S3_PREFIX}/{p.stem}/data.csv.gz" not in existing]
|
||||
skipped = len(pc_files) - len(todo)
|
||||
logger.info(
|
||||
f"Uploading {len(todo)} shards (skipping {skipped} already in S3) -> "
|
||||
f"s3://{S3_BUCKET}/{S3_PREFIX}/"
|
||||
)
|
||||
|
||||
workers = 256
|
||||
todo_iter = iter(todo)
|
||||
inflight: dict[Any, Path] = {}
|
||||
pbar = tqdm(total=len(todo), desc="upload")
|
||||
with ThreadPoolExecutor(max_workers=workers) as pool:
|
||||
for _ in range(workers * 2):
|
||||
pc = next(todo_iter, None)
|
||||
if pc is None:
|
||||
break
|
||||
inflight[pool.submit(upload_postcode, pc, s3)] = pc
|
||||
|
||||
while inflight:
|
||||
done, _ = wait(inflight.keys(), return_when=FIRST_COMPLETED)
|
||||
for fut in done:
|
||||
pc = inflight.pop(fut)
|
||||
try:
|
||||
fut.result()
|
||||
except Exception as e:
|
||||
logger.error(f"{pc.name}: {e}")
|
||||
raise
|
||||
pbar.update(1)
|
||||
nxt = next(todo_iter, None)
|
||||
if nxt is not None:
|
||||
inflight[pool.submit(upload_postcode, nxt, s3)] = nxt
|
||||
pbar.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -83,7 +83,7 @@ def process_export(
|
|||
else:
|
||||
scenario_recs = recommendations_df[
|
||||
recommendations_df["scenario_id"] == group_key
|
||||
]
|
||||
]
|
||||
|
||||
if scenario_recs.empty:
|
||||
logger.info(
|
||||
|
|
@ -140,8 +140,8 @@ def handler(
|
|||
body_dict = {
|
||||
"task_id": "test",
|
||||
"subtask_id": "test",
|
||||
"portfolio_id": 682,
|
||||
"scenario_ids": [1210],
|
||||
"portfolio_id": 632,
|
||||
"scenario_ids": [1144],
|
||||
"default_plans_only": False,
|
||||
}
|
||||
:param event: Lambda event containing export request details
|
||||
|
|
|
|||
|
|
@ -5,13 +5,14 @@ from backend.magic_plan.magic_plan_client import MagicPlanClient
|
|||
from backend.magic_plan.magic_plan_service import MagicPlanService
|
||||
from backend.magic_plan.magic_plan_trigger_request import MagicPlanTriggerRequest
|
||||
from datatypes.magicplan.domain.models import Plan
|
||||
from backend.app.db.models.tasks import SourceEnum
|
||||
from backend.utils.subtasks import task_handler
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
@task_handler()
|
||||
@task_handler(task_source="magic_plan", source=SourceEnum.HUBSPOT_DEAL)
|
||||
def handler(body: dict[str, Any], context: Any) -> str:
|
||||
settings = get_settings()
|
||||
payload = MagicPlanTriggerRequest.model_validate(body)
|
||||
|
|
@ -19,7 +20,10 @@ def handler(body: dict[str, Any], context: Any) -> str:
|
|||
customer_id=settings.MAGICPLAN_CUSTOMER_ID,
|
||||
api_key=settings.MAGICPLAN_API_KEY,
|
||||
)
|
||||
plan: Plan = MagicPlanService(client).run(payload.address, payload.uprn)
|
||||
# TODO: read s3_bucket from env var so staging/prod use the correct bucket
|
||||
plan: Plan = MagicPlanService(
|
||||
client, s3_bucket="retrofit-energy-assessments-dev"
|
||||
).run(payload)
|
||||
logger.info("Saved MagicPlan plan uid=%s", plan.uid)
|
||||
return plan.uid
|
||||
|
||||
|
|
@ -28,8 +32,7 @@ if __name__ == "__main__":
|
|||
event = {
|
||||
"Records": [
|
||||
{
|
||||
"body": '{"address": "2 Laburnum Way Bromley BR2 8BZ"}',
|
||||
"messageId": "local-test",
|
||||
"body": '{"address": "2 Laburnum Way Bromley BR2 8BZ", "hubspot_deal_id": "local-test-deal"}',
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
12
backend/magic_plan/handler/Dockerfile
Normal file
12
backend/magic_plan/handler/Dockerfile
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
FROM public.ecr.aws/lambda/python:3.11
|
||||
|
||||
WORKDIR /var/task
|
||||
|
||||
COPY backend/magic_plan/handler/requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY utils/ utils/
|
||||
COPY backend/ backend/
|
||||
COPY datatypes/ datatypes/
|
||||
|
||||
CMD ["backend.magic_plan.handler.handler"]
|
||||
11
backend/magic_plan/handler/requirements.txt
Normal file
11
backend/magic_plan/handler/requirements.txt
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
awslambdaric
|
||||
requests
|
||||
sqlalchemy==2.0.36
|
||||
sqlmodel
|
||||
psycopg2-binary==2.9.10
|
||||
pydantic-settings==2.6.0
|
||||
boto3==1.35.44
|
||||
|
||||
pytz==2024.2
|
||||
pandas==2.2.2
|
||||
numpy==2.1.2
|
||||
11
backend/magic_plan/local_handler/docker-compose.yml
Normal file
11
backend/magic_plan/local_handler/docker-compose.yml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
version: "3.9"
|
||||
|
||||
services:
|
||||
ecmk-fetcher-lambda:
|
||||
build:
|
||||
context: ../../../
|
||||
dockerfile: backend/magic_plan/handler/Dockerfile
|
||||
ports:
|
||||
- "9000:8080"
|
||||
env_file:
|
||||
- ../../../.env
|
||||
29
backend/magic_plan/local_handler/invoke_local_lambda.py
Normal file
29
backend/magic_plan/local_handler/invoke_local_lambda.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
#!/usr/bin/env python3
|
||||
import json
|
||||
import requests
|
||||
|
||||
HOST = "localhost"
|
||||
PORT = "9000"
|
||||
|
||||
LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
|
||||
|
||||
payload = {
|
||||
"Records": [
|
||||
{
|
||||
"messageId": "test-message-id",
|
||||
"body": json.dumps(
|
||||
# {
|
||||
# "address": "2 Laburnum Way, Rombley, BR2 8BZ | Retrofit Assessment",
|
||||
# "hubspot_deal_id": "500262906061",
|
||||
# }
|
||||
{"address": "33 Wallaby Way, Sydney", "hubspot_deal_id": "123456789"}
|
||||
),
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
response = requests.post(LAMBDA_URL, json=payload)
|
||||
|
||||
print("Status code:", response.status_code)
|
||||
print("Response:")
|
||||
print(response.text)
|
||||
|
|
@ -1,24 +1,35 @@
|
|||
import requests
|
||||
|
||||
from datatypes.magicplan.api.response import MagicPlanPlan, PlansListResponse
|
||||
from datatypes.magicplan.api.response import MagicPlanPlan, PlanSummary, PlansListResponse
|
||||
|
||||
_BASE_URL = "https://cloud.magicplan.app/api/v2"
|
||||
|
||||
|
||||
class MagicPlanClient:
|
||||
def __init__(self, customer_id: str, api_key: str) -> None:
|
||||
self._api_key = api_key
|
||||
self._session = requests.Session()
|
||||
self._session.headers.update({"customer": customer_id})
|
||||
self._session.headers.update({"customer": customer_id, "key": api_key})
|
||||
|
||||
def get_plans(self) -> PlansListResponse:
|
||||
r = self._session.get(f"{_BASE_URL}/plans", params={"key": self._api_key})
|
||||
r.raise_for_status()
|
||||
return PlansListResponse.model_validate(r.json()["data"])
|
||||
def get_plans(self) -> list[PlanSummary]:
|
||||
all_plans: list[PlanSummary] = []
|
||||
page = 1
|
||||
while True:
|
||||
r = self._session.get(f"{_BASE_URL}/workgroups/plans", params={"page": page})
|
||||
r.raise_for_status()
|
||||
response = PlansListResponse.model_validate(r.json()["data"])
|
||||
all_plans.extend(response.plans)
|
||||
if not response.paging.next_page:
|
||||
break
|
||||
page += 1
|
||||
return all_plans
|
||||
|
||||
def get_plan(self, plan_id: str) -> MagicPlanPlan:
|
||||
r = self._session.get(
|
||||
f"{_BASE_URL}/plans/{plan_id}", params={"key": self._api_key}
|
||||
)
|
||||
return MagicPlanPlan.model_validate(self._fetch_plan(plan_id).json()["data"])
|
||||
|
||||
def get_plan_raw(self, plan_id: str) -> bytes:
|
||||
return self._fetch_plan(plan_id).content
|
||||
|
||||
def _fetch_plan(self, plan_id: str) -> requests.Response:
|
||||
r = self._session.get(f"{_BASE_URL}/plans/get/{plan_id}")
|
||||
r.raise_for_status()
|
||||
return MagicPlanPlan.model_validate(r.json()["data"])
|
||||
return r
|
||||
|
|
|
|||
|
|
@ -1,42 +1,84 @@
|
|||
import gzip
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
|
||||
from datatypes.magicplan.api.response import (
|
||||
MagicPlanPlan,
|
||||
PlanSummary,
|
||||
PlansListResponse,
|
||||
)
|
||||
from datatypes.magicplan.api.response import MagicPlanPlan, PlanSummary
|
||||
from datatypes.magicplan.domain.mapper import map_plan
|
||||
from datatypes.magicplan.domain.models import Plan
|
||||
|
||||
from backend.app.db.connection import db_session
|
||||
from backend.app.db.functions.magic_plan_functions import save_plan
|
||||
from backend.app.db.models.uploaded_file import (
|
||||
FileSourceEnum,
|
||||
FileTypeEnum,
|
||||
UploadedFile,
|
||||
)
|
||||
from backend.magic_plan.address_matcher import find_matching_plan
|
||||
from backend.magic_plan.magic_plan_client import MagicPlanClient
|
||||
from backend.magic_plan.magic_plan_trigger_request import MagicPlanTriggerRequest
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import save_data_to_s3
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class MagicPlanService:
|
||||
def __init__(self, client: MagicPlanClient) -> None:
|
||||
def __init__(self, client: MagicPlanClient, s3_bucket: str) -> None:
|
||||
self._client = client
|
||||
self._s3_bucket = s3_bucket
|
||||
|
||||
def run(self, request: MagicPlanTriggerRequest) -> Plan:
|
||||
address = request.address
|
||||
uprn = request.uprn
|
||||
|
||||
def run(self, address: str, uprn: Optional[str] = None) -> Plan:
|
||||
if uprn is not None:
|
||||
logger.info("MagicPlanService.run uprn=%s", uprn)
|
||||
|
||||
plans_response: PlansListResponse = self._client.get_plans()
|
||||
matched: Optional[PlanSummary] = find_matching_plan(
|
||||
plans_response.plans, address
|
||||
) # TODO: use address2UPRN instead? or create AddressMatch domain class
|
||||
plans: list[PlanSummary] = self._client.get_plans()
|
||||
matched: Optional[PlanSummary] = find_matching_plan(plans, address)
|
||||
|
||||
if matched is None:
|
||||
raise ValueError(f"No MagicPlan found for address: {address!r}")
|
||||
|
||||
magic_plan: MagicPlanPlan = self._client.get_plan(matched.id)
|
||||
raw_bytes: bytes = self._client.get_plan_raw(matched.id)
|
||||
magic_plan: MagicPlanPlan = MagicPlanPlan.model_validate(
|
||||
json.loads(raw_bytes)["data"]
|
||||
)
|
||||
plan: Plan = map_plan(magic_plan)
|
||||
|
||||
uploaded_file: UploadedFile = self._upload_raw_plan_json(
|
||||
plan_id=matched.id,
|
||||
raw_bytes=raw_bytes,
|
||||
uprn=uprn,
|
||||
hubspot_deal_id=request.hubspot_deal_id,
|
||||
)
|
||||
|
||||
with db_session() as session:
|
||||
save_plan(session, plan)
|
||||
session.add(uploaded_file)
|
||||
|
||||
return plan
|
||||
|
||||
def _upload_raw_plan_json(
|
||||
self,
|
||||
plan_id: str,
|
||||
raw_bytes: bytes,
|
||||
uprn: Optional[str],
|
||||
hubspot_deal_id: str,
|
||||
) -> UploadedFile:
|
||||
compressed = gzip.compress(raw_bytes)
|
||||
if uprn is not None:
|
||||
s3_key = f"documents/uprn/{uprn}/magic_plan_{plan_id}.json.gz"
|
||||
else:
|
||||
s3_key = f"documents/hubspot_deal_id/{hubspot_deal_id}/magic_plan_{plan_id}.json.gz"
|
||||
save_data_to_s3(compressed, self._s3_bucket, s3_key)
|
||||
return UploadedFile(
|
||||
s3_file_bucket=self._s3_bucket,
|
||||
s3_file_key=s3_key,
|
||||
s3_upload_timestamp=datetime.now(timezone.utc),
|
||||
uprn=int(uprn) if uprn is not None else None,
|
||||
hubspot_deal_id=hubspot_deal_id,
|
||||
file_source=FileSourceEnum.MAGIC_PLAN.value,
|
||||
file_type=FileTypeEnum.MAGIC_PLAN_JSON.value,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -7,4 +7,5 @@ class MagicPlanTriggerRequest(BaseModel):
|
|||
model_config = ConfigDict(extra="ignore")
|
||||
|
||||
address: str
|
||||
hubspot_deal_id: str
|
||||
uprn: Optional[str] = None
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ def test_handler_raises_on_missing_address(mock_plan: MagicMock) -> None:
|
|||
|
||||
def test_handler_constructs_client_from_settings(mock_service: MagicMock) -> None:
|
||||
# Arrange
|
||||
body = {"address": ADDRESS}
|
||||
body = {"address": ADDRESS, "hubspot_deal_id": "deal-123"}
|
||||
with patch("backend.magic_plan.handler.get_settings", return_value=_make_settings(customer_id="cust-xyz", api_key="key-xyz")), \
|
||||
patch("backend.magic_plan.handler.MagicPlanClient") as MockClient, \
|
||||
patch("backend.magic_plan.handler.MagicPlanService", return_value=mock_service):
|
||||
|
|
@ -69,31 +69,37 @@ def test_handler_constructs_client_from_settings(mock_service: MagicMock) -> Non
|
|||
|
||||
def test_handler_calls_service_run_with_address(mock_service: MagicMock) -> None:
|
||||
# Arrange
|
||||
body = {"address": ADDRESS}
|
||||
body = {"address": ADDRESS, "hubspot_deal_id": "deal-123"}
|
||||
with patch("backend.magic_plan.handler.get_settings", return_value=_make_settings()), \
|
||||
patch("backend.magic_plan.handler.MagicPlanClient"), \
|
||||
patch("backend.magic_plan.handler.MagicPlanService", return_value=mock_service):
|
||||
# Act
|
||||
_call_handler(body)
|
||||
# Assert
|
||||
mock_service.run.assert_called_once_with(ADDRESS, None)
|
||||
mock_service.run.assert_called_once()
|
||||
request = mock_service.run.call_args.args[0]
|
||||
assert request.address == ADDRESS
|
||||
assert request.uprn is None
|
||||
|
||||
|
||||
def test_handler_passes_uprn_to_service(mock_service: MagicMock) -> None:
|
||||
# Arrange
|
||||
body = {"address": ADDRESS, "uprn": "100023336956"}
|
||||
body = {"address": ADDRESS, "uprn": "100023336956", "hubspot_deal_id": "deal-123"}
|
||||
with patch("backend.magic_plan.handler.get_settings", return_value=_make_settings()), \
|
||||
patch("backend.magic_plan.handler.MagicPlanClient"), \
|
||||
patch("backend.magic_plan.handler.MagicPlanService", return_value=mock_service):
|
||||
# Act
|
||||
_call_handler(body)
|
||||
# Assert
|
||||
mock_service.run.assert_called_once_with(ADDRESS, "100023336956")
|
||||
mock_service.run.assert_called_once()
|
||||
request = mock_service.run.call_args.args[0]
|
||||
assert request.address == ADDRESS
|
||||
assert request.uprn == "100023336956"
|
||||
|
||||
|
||||
def test_handler_returns_plan_uid(mock_service: MagicMock) -> None:
|
||||
# Arrange
|
||||
body = {"address": ADDRESS}
|
||||
body = {"address": ADDRESS, "hubspot_deal_id": "deal-123"}
|
||||
with patch("backend.magic_plan.handler.get_settings", return_value=_make_settings()), \
|
||||
patch("backend.magic_plan.handler.MagicPlanClient"), \
|
||||
patch("backend.magic_plan.handler.MagicPlanService", return_value=mock_service):
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import pytest
|
|||
import requests
|
||||
|
||||
from backend.magic_plan.magic_plan_client import MagicPlanClient
|
||||
from datatypes.magicplan.api.response import MagicPlanPlan, PlansListResponse
|
||||
from datatypes.magicplan.api.response import MagicPlanPlan, PlanSummary
|
||||
|
||||
FIXTURE_DIR = Path(__file__).parents[2] / "magic_plan"
|
||||
BASE_URL = "https://cloud.magicplan.app/api/v2"
|
||||
|
|
@ -20,6 +20,7 @@ def _load_fixture(name: str) -> dict[str, Any]:
|
|||
|
||||
|
||||
def _make_client(mock_session: MagicMock) -> MagicPlanClient:
|
||||
mock_session.headers = {}
|
||||
with patch(
|
||||
"backend.magic_plan.magic_plan_client.requests.Session",
|
||||
return_value=mock_session,
|
||||
|
|
@ -44,7 +45,14 @@ def test_customer_header_set_on_session(mock_session: MagicMock) -> None:
|
|||
# Act
|
||||
_make_client(mock_session)
|
||||
# Assert
|
||||
mock_session.headers.update.assert_called_once_with({"customer": CUSTOMER_ID})
|
||||
assert mock_session.headers["customer"] == CUSTOMER_ID
|
||||
|
||||
|
||||
def test_api_key_header_set_on_session(mock_session: MagicMock) -> None:
|
||||
# Act
|
||||
_make_client(mock_session)
|
||||
# Assert
|
||||
assert mock_session.headers["key"] == API_KEY
|
||||
|
||||
|
||||
# --- get_plans ---
|
||||
|
|
@ -63,7 +71,7 @@ def test_get_plans_calls_correct_url(
|
|||
client.get_plans()
|
||||
# Assert
|
||||
mock_session.get.assert_called_once_with(
|
||||
f"{BASE_URL}/plans", params={"key": API_KEY}
|
||||
f"{BASE_URL}/workgroups/plans", params={"page": 1}
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -82,7 +90,7 @@ def test_get_plans_calls_raise_for_status(
|
|||
mock_session.get.return_value.raise_for_status.assert_called_once()
|
||||
|
||||
|
||||
def test_get_plans_returns_plans_list_response(
|
||||
def test_get_plans_returns_list_of_plan_summaries(
|
||||
client: MagicPlanClient, mock_session: MagicMock
|
||||
) -> None:
|
||||
# Arrange
|
||||
|
|
@ -94,8 +102,9 @@ def test_get_plans_returns_plans_list_response(
|
|||
# Act
|
||||
result = client.get_plans()
|
||||
# Assert
|
||||
assert isinstance(result, PlansListResponse)
|
||||
assert len(result.plans) == 1
|
||||
assert isinstance(result, list)
|
||||
assert len(result) == 1
|
||||
assert isinstance(result[0], PlanSummary)
|
||||
|
||||
|
||||
def test_get_plans_propagates_http_error(
|
||||
|
|
@ -110,6 +119,34 @@ def test_get_plans_propagates_http_error(
|
|||
client.get_plans()
|
||||
|
||||
|
||||
def test_get_plans_multi_page_fetches_all_pages(
|
||||
client: MagicPlanClient, mock_session: MagicMock
|
||||
) -> None:
|
||||
# Arrange
|
||||
page1_plan = _load_fixture("magicplan_api_plans_response_example.json")["data"][
|
||||
"plans"
|
||||
][0]
|
||||
page2_plan = {**page1_plan, "id": "page-2-plan-id"}
|
||||
page1_response = MagicMock()
|
||||
page1_response.json.return_value = {
|
||||
"data": {"paging": {"page": 1, "next_page": True, "count": 2}, "plans": [page1_plan]}
|
||||
}
|
||||
page2_response = MagicMock()
|
||||
page2_response.json.return_value = {
|
||||
"data": {"paging": {"page": 2, "next_page": False, "count": 2}, "plans": [page2_plan]}
|
||||
}
|
||||
mock_session.get.side_effect = [page1_response, page2_response]
|
||||
# Act
|
||||
result = client.get_plans()
|
||||
# Assert
|
||||
assert mock_session.get.call_count == 2
|
||||
mock_session.get.assert_any_call(f"{BASE_URL}/workgroups/plans", params={"page": 1})
|
||||
mock_session.get.assert_any_call(f"{BASE_URL}/workgroups/plans", params={"page": 2})
|
||||
assert len(result) == 2
|
||||
assert result[0].id == page1_plan["id"]
|
||||
assert result[1].id == "page-2-plan-id"
|
||||
|
||||
|
||||
# --- get_plan ---
|
||||
|
||||
|
||||
|
|
@ -126,9 +163,7 @@ def test_get_plan_calls_correct_url(
|
|||
# Act
|
||||
client.get_plan(plan_id)
|
||||
# Assert
|
||||
mock_session.get.assert_called_once_with(
|
||||
f"{BASE_URL}/plans/{plan_id}", params={"key": API_KEY}
|
||||
)
|
||||
mock_session.get.assert_called_once_with(f"{BASE_URL}/plans/get/{plan_id}")
|
||||
|
||||
|
||||
def test_get_plan_calls_raise_for_status(
|
||||
|
|
@ -172,3 +207,53 @@ def test_get_plan_propagates_http_error(
|
|||
# Act / Assert
|
||||
with pytest.raises(requests.HTTPError):
|
||||
client.get_plan("some-id")
|
||||
|
||||
|
||||
# --- get_plan_raw ---
|
||||
|
||||
|
||||
def test_get_plan_raw_returns_bytes(
|
||||
client: MagicPlanClient, mock_session: MagicMock
|
||||
) -> None:
|
||||
# Arrange
|
||||
mock_session.get.return_value.content = b'{"data": "raw"}'
|
||||
plan_id = "a7285ed1-878d-47eb-8aa6-85ef9e187516"
|
||||
# Act
|
||||
result = client.get_plan_raw(plan_id)
|
||||
# Assert
|
||||
assert isinstance(result, bytes)
|
||||
|
||||
|
||||
def test_get_plan_raw_calls_correct_url(
|
||||
client: MagicPlanClient, mock_session: MagicMock
|
||||
) -> None:
|
||||
# Arrange
|
||||
mock_session.get.return_value.content = b"{}"
|
||||
plan_id = "a7285ed1-878d-47eb-8aa6-85ef9e187516"
|
||||
# Act
|
||||
client.get_plan_raw(plan_id)
|
||||
# Assert
|
||||
mock_session.get.assert_called_once_with(f"{BASE_URL}/plans/get/{plan_id}")
|
||||
|
||||
|
||||
def test_get_plan_raw_calls_raise_for_status(
|
||||
client: MagicPlanClient, mock_session: MagicMock
|
||||
) -> None:
|
||||
# Arrange
|
||||
mock_session.get.return_value.content = b"{}"
|
||||
# Act
|
||||
client.get_plan_raw("a7285ed1-878d-47eb-8aa6-85ef9e187516")
|
||||
# Assert
|
||||
mock_session.get.return_value.raise_for_status.assert_called_once()
|
||||
|
||||
|
||||
def test_get_plan_raw_propagates_http_error(
|
||||
client: MagicPlanClient, mock_session: MagicMock
|
||||
) -> None:
|
||||
# Arrange
|
||||
mock_session.get.return_value.raise_for_status.side_effect = requests.HTTPError(
|
||||
"500"
|
||||
)
|
||||
# Act / Assert
|
||||
with pytest.raises(requests.HTTPError):
|
||||
client.get_plan_raw("some-id")
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import json
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
from unittest.mock import ANY, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
@ -8,11 +8,18 @@ from datatypes.magicplan.api.response import MagicPlanPlan, PlanSummary
|
|||
from datatypes.magicplan.domain.mapper import map_plan
|
||||
from datatypes.magicplan.domain.models import Plan
|
||||
|
||||
from backend.app.db.models.uploaded_file import (
|
||||
FileSourceEnum,
|
||||
FileTypeEnum,
|
||||
UploadedFile,
|
||||
)
|
||||
from backend.magic_plan.magic_plan_client import MagicPlanClient
|
||||
from backend.magic_plan.magic_plan_service import MagicPlanService
|
||||
from backend.magic_plan.magic_plan_trigger_request import MagicPlanTriggerRequest
|
||||
|
||||
FIXTURE_DIR = Path(__file__).parents[2] / "magic_plan"
|
||||
PLAN_ID = "a7285ed1-878d-47eb-8aa6-85ef9e187516"
|
||||
S3_BUCKET = "test-bucket"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
|
|
@ -41,11 +48,25 @@ def plan_summary() -> PlanSummary:
|
|||
|
||||
@pytest.fixture()
|
||||
def mock_client() -> MagicMock:
|
||||
return MagicMock(spec=MagicPlanClient)
|
||||
client = MagicMock(spec=MagicPlanClient)
|
||||
client.get_plan_raw.return_value = (
|
||||
FIXTURE_DIR / "magicplan_api_plan_response_example.json"
|
||||
).read_bytes()
|
||||
return client
|
||||
|
||||
|
||||
def _make_service(mock_client: MagicMock) -> MagicPlanService:
|
||||
return MagicPlanService(client=mock_client)
|
||||
return MagicPlanService(client=mock_client, s3_bucket=S3_BUCKET)
|
||||
|
||||
|
||||
def _make_request(
|
||||
address: str = "2 Laburnum Way Bromley BR2 8BZ",
|
||||
hubspot_deal_id: str = "deal-123",
|
||||
uprn: str | None = None,
|
||||
) -> MagicPlanTriggerRequest:
|
||||
return MagicPlanTriggerRequest(
|
||||
address=address, hubspot_deal_id=hubspot_deal_id, uprn=uprn
|
||||
)
|
||||
|
||||
|
||||
# --- no match ---
|
||||
|
|
@ -57,7 +78,7 @@ def test_run_raises_when_no_plan_found(mock_client: MagicMock) -> None:
|
|||
service = _make_service(mock_client)
|
||||
# Act / Assert
|
||||
with pytest.raises(ValueError, match="No MagicPlan found"):
|
||||
service.run("99 Nowhere Road London SW1A 1AA")
|
||||
service.run(_make_request(address="99 Nowhere Road London SW1A 1AA"))
|
||||
|
||||
|
||||
# --- match found ---
|
||||
|
|
@ -70,7 +91,7 @@ def test_run_fetches_plan_with_matched_id(
|
|||
domain_plan: Plan,
|
||||
) -> None:
|
||||
# Arrange
|
||||
mock_client.get_plans.return_value.plans = [plan_summary]
|
||||
mock_client.get_plans.return_value = [plan_summary]
|
||||
mock_client.get_plan.return_value = api_magic_plan
|
||||
service = _make_service(mock_client)
|
||||
with patch(
|
||||
|
|
@ -78,10 +99,12 @@ def test_run_fetches_plan_with_matched_id(
|
|||
return_value=plan_summary,
|
||||
), patch("backend.magic_plan.magic_plan_service.save_plan"), patch(
|
||||
"backend.magic_plan.magic_plan_service.db_session"
|
||||
), patch(
|
||||
"backend.magic_plan.magic_plan_service.save_data_to_s3"
|
||||
):
|
||||
service.run("2 Laburnum Way Bromley BR2 8BZ")
|
||||
service.run(_make_request())
|
||||
# Assert
|
||||
mock_client.get_plan.assert_called_once_with(plan_summary.id)
|
||||
mock_client.get_plan_raw.assert_called_once_with(plan_summary.id)
|
||||
|
||||
|
||||
def test_run_returns_mapped_plan(
|
||||
|
|
@ -91,7 +114,7 @@ def test_run_returns_mapped_plan(
|
|||
domain_plan: Plan,
|
||||
) -> None:
|
||||
# Arrange
|
||||
mock_client.get_plans.return_value.plans = [plan_summary]
|
||||
mock_client.get_plans.return_value = [plan_summary]
|
||||
mock_client.get_plan.return_value = api_magic_plan
|
||||
service = _make_service(mock_client)
|
||||
with patch(
|
||||
|
|
@ -99,8 +122,10 @@ def test_run_returns_mapped_plan(
|
|||
return_value=plan_summary,
|
||||
), patch("backend.magic_plan.magic_plan_service.save_plan"), patch(
|
||||
"backend.magic_plan.magic_plan_service.db_session"
|
||||
), patch(
|
||||
"backend.magic_plan.magic_plan_service.save_data_to_s3"
|
||||
):
|
||||
result = service.run("2 Laburnum Way Bromley BR2 8BZ")
|
||||
result = service.run(_make_request())
|
||||
# Assert
|
||||
assert isinstance(result, Plan)
|
||||
assert result.uid == PLAN_ID
|
||||
|
|
@ -112,7 +137,7 @@ def test_run_calls_save_plan_with_mapped_plan(
|
|||
plan_summary: PlanSummary,
|
||||
) -> None:
|
||||
# Arrange
|
||||
mock_client.get_plans.return_value.plans = [plan_summary]
|
||||
mock_client.get_plans.return_value = [plan_summary]
|
||||
mock_client.get_plan.return_value = api_magic_plan
|
||||
service = _make_service(mock_client)
|
||||
with patch(
|
||||
|
|
@ -120,8 +145,10 @@ def test_run_calls_save_plan_with_mapped_plan(
|
|||
return_value=plan_summary,
|
||||
), patch("backend.magic_plan.magic_plan_service.save_plan") as mock_save, patch(
|
||||
"backend.magic_plan.magic_plan_service.db_session"
|
||||
), patch(
|
||||
"backend.magic_plan.magic_plan_service.save_data_to_s3"
|
||||
):
|
||||
service.run("2 Laburnum Way Bromley BR2 8BZ")
|
||||
service.run(_make_request())
|
||||
# Assert — save_plan called with a Plan whose uid matches
|
||||
call_args = mock_save.call_args
|
||||
saved_plan: Plan = call_args[0][1]
|
||||
|
|
@ -134,7 +161,7 @@ def test_run_accepts_uprn_without_error(
|
|||
plan_summary: PlanSummary,
|
||||
) -> None:
|
||||
# Arrange
|
||||
mock_client.get_plans.return_value.plans = [plan_summary]
|
||||
mock_client.get_plans.return_value = [plan_summary]
|
||||
mock_client.get_plan.return_value = api_magic_plan
|
||||
service = _make_service(mock_client)
|
||||
with patch(
|
||||
|
|
@ -142,5 +169,105 @@ def test_run_accepts_uprn_without_error(
|
|||
return_value=plan_summary,
|
||||
), patch("backend.magic_plan.magic_plan_service.save_plan"), patch(
|
||||
"backend.magic_plan.magic_plan_service.db_session"
|
||||
), patch(
|
||||
"backend.magic_plan.magic_plan_service.save_data_to_s3"
|
||||
):
|
||||
service.run("2 Laburnum Way Bromley BR2 8BZ", uprn="100023336956")
|
||||
service.run(_make_request(uprn="100023336956"))
|
||||
|
||||
|
||||
# --- S3 upload ---
|
||||
|
||||
|
||||
def test_run_uploads_to_s3_with_uprn_key(
|
||||
mock_client: MagicMock,
|
||||
api_magic_plan: MagicPlanPlan,
|
||||
plan_summary: PlanSummary,
|
||||
) -> None:
|
||||
# Arrange
|
||||
mock_client.get_plans.return_value = [plan_summary]
|
||||
request = _make_request(uprn="100023336956")
|
||||
service = MagicPlanService(client=mock_client, s3_bucket=S3_BUCKET)
|
||||
with patch(
|
||||
"backend.magic_plan.magic_plan_service.find_matching_plan",
|
||||
return_value=plan_summary,
|
||||
), patch("backend.magic_plan.magic_plan_service.save_plan"), patch(
|
||||
"backend.magic_plan.magic_plan_service.db_session"
|
||||
), patch(
|
||||
"backend.magic_plan.magic_plan_service.save_data_to_s3"
|
||||
) as mock_s3:
|
||||
# Act
|
||||
service.run(request)
|
||||
# Assert
|
||||
mock_s3.assert_called_once_with(
|
||||
ANY,
|
||||
S3_BUCKET,
|
||||
f"documents/uprn/100023336956/magic_plan_{plan_summary.id}.json.gz",
|
||||
)
|
||||
|
||||
|
||||
def test_run_uploads_to_s3_with_deal_id_key_when_uprn_absent(
|
||||
mock_client: MagicMock,
|
||||
api_magic_plan: MagicPlanPlan,
|
||||
plan_summary: PlanSummary,
|
||||
) -> None:
|
||||
# Arrange
|
||||
mock_client.get_plans.return_value = [plan_summary]
|
||||
mock_client.get_plan.return_value = api_magic_plan
|
||||
request = _make_request(hubspot_deal_id="deal-456", uprn=None)
|
||||
service = MagicPlanService(client=mock_client, s3_bucket=S3_BUCKET)
|
||||
with patch(
|
||||
"backend.magic_plan.magic_plan_service.find_matching_plan",
|
||||
return_value=plan_summary,
|
||||
), patch("backend.magic_plan.magic_plan_service.save_plan"), patch(
|
||||
"backend.magic_plan.magic_plan_service.db_session"
|
||||
), patch(
|
||||
"backend.magic_plan.magic_plan_service.save_data_to_s3"
|
||||
) as mock_s3:
|
||||
# Act
|
||||
service.run(request)
|
||||
# Assert
|
||||
mock_s3.assert_called_once_with(
|
||||
ANY,
|
||||
S3_BUCKET,
|
||||
f"documents/hubspot_deal_id/deal-456/magic_plan_{plan_summary.id}.json.gz",
|
||||
)
|
||||
|
||||
|
||||
# --- UploadedFile record ---
|
||||
|
||||
|
||||
def test_run_creates_uploaded_file_record(
|
||||
mock_client: MagicMock,
|
||||
api_magic_plan: MagicPlanPlan,
|
||||
plan_summary: PlanSummary,
|
||||
) -> None:
|
||||
# Arrange
|
||||
mock_client.get_plans.return_value = [plan_summary]
|
||||
mock_client.get_plan.return_value = api_magic_plan
|
||||
request = _make_request(hubspot_deal_id="deal-789", uprn="100023336956")
|
||||
service = MagicPlanService(client=mock_client, s3_bucket=S3_BUCKET)
|
||||
mock_session = MagicMock()
|
||||
with patch(
|
||||
"backend.magic_plan.magic_plan_service.find_matching_plan",
|
||||
return_value=plan_summary,
|
||||
), patch("backend.magic_plan.magic_plan_service.save_plan"), patch(
|
||||
"backend.magic_plan.magic_plan_service.db_session"
|
||||
) as mock_db, patch(
|
||||
"backend.magic_plan.magic_plan_service.save_data_to_s3"
|
||||
):
|
||||
mock_db.return_value.__enter__.return_value = mock_session
|
||||
# Act
|
||||
service.run(request)
|
||||
# Assert
|
||||
added_objects = [call.args[0] for call in mock_session.add.call_args_list]
|
||||
uploaded_file = next(
|
||||
(obj for obj in added_objects if isinstance(obj, UploadedFile)), None
|
||||
)
|
||||
assert uploaded_file is not None
|
||||
assert uploaded_file.file_source == FileSourceEnum.MAGIC_PLAN.value
|
||||
assert uploaded_file.file_type == FileTypeEnum.MAGIC_PLAN_JSON.value
|
||||
assert uploaded_file.s3_file_bucket == S3_BUCKET
|
||||
assert uploaded_file.s3_file_key == f"documents/uprn/100023336956/magic_plan_{plan_summary.id}.json.gz"
|
||||
assert uploaded_file.s3_upload_timestamp is not None
|
||||
assert uploaded_file.uprn == 100023336956
|
||||
assert uploaded_file.hubspot_deal_id == "deal-789"
|
||||
|
|
|
|||
|
|
@ -6,17 +6,18 @@ from backend.magic_plan.magic_plan_trigger_request import MagicPlanTriggerReques
|
|||
|
||||
def test_valid_payload_with_address_only() -> None:
|
||||
# Arrange
|
||||
payload = {"address": "123 High St London SW1A 1AA"}
|
||||
payload = {"address": "123 High St London SW1A 1AA", "hubspot_deal_id": "123456789"}
|
||||
# Act
|
||||
req = MagicPlanTriggerRequest.model_validate(payload)
|
||||
# Assert
|
||||
assert req.address == "123 High St London SW1A 1AA"
|
||||
assert req.hubspot_deal_id == "123456789"
|
||||
assert req.uprn is None
|
||||
|
||||
|
||||
def test_valid_payload_with_uprn() -> None:
|
||||
# Arrange
|
||||
payload = {"address": "123 High St London SW1A 1AA", "uprn": "100023336956"}
|
||||
payload = {"address": "123 High St London SW1A 1AA", "hubspot_deal_id": "123456789", "uprn": "100023336956"}
|
||||
# Act
|
||||
req = MagicPlanTriggerRequest.model_validate(payload)
|
||||
# Assert
|
||||
|
|
@ -25,7 +26,7 @@ def test_valid_payload_with_uprn() -> None:
|
|||
|
||||
def test_missing_address_raises() -> None:
|
||||
# Arrange
|
||||
payload = {"uprn": "100023336956"}
|
||||
payload = {"hubspot_deal_id": "123456789", "uprn": "100023336956"}
|
||||
# Act / Assert
|
||||
with pytest.raises(ValidationError):
|
||||
MagicPlanTriggerRequest.model_validate(payload)
|
||||
|
|
@ -33,8 +34,16 @@ def test_missing_address_raises() -> None:
|
|||
|
||||
def test_extra_fields_ignored() -> None:
|
||||
# Arrange
|
||||
payload = {"address": "123 High St London SW1A 1AA", "unknown_field": "whatever"}
|
||||
payload = {"address": "123 High St London SW1A 1AA", "hubspot_deal_id": "123456789", "unknown_field": "whatever"}
|
||||
# Act
|
||||
req = MagicPlanTriggerRequest.model_validate(payload)
|
||||
# Assert
|
||||
assert req.address == "123 High St London SW1A 1AA"
|
||||
|
||||
|
||||
def test_missing_hubspot_deal_id_raises() -> None:
|
||||
# Arrange
|
||||
payload = {"address": "123 High St London SW1A 1AA"}
|
||||
# Act / Assert
|
||||
with pytest.raises(ValidationError):
|
||||
MagicPlanTriggerRequest.model_validate(payload)
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
|
|||
from backend.pashub_fetcher.pashub_service import PashubService
|
||||
from backend.pashub_fetcher.pashub_to_ara_trigger_request import PashubToAraTriggerRequest
|
||||
from backend.pashub_fetcher.token_getter import get_token_from_local_storage
|
||||
from backend.app.db.models.tasks import SourceEnum
|
||||
from backend.utils.subtasks import task_handler
|
||||
from utils.logger import setup_logger
|
||||
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
||||
|
|
@ -21,7 +22,7 @@ def get_pashub_client(email: str, password: str) -> PashubClient:
|
|||
return PashubClient(token=token)
|
||||
|
||||
|
||||
@task_handler()
|
||||
@task_handler(task_source="pashub_fetcher", source=SourceEnum.HUBSPOT_DEAL)
|
||||
def handler(body: Dict[str, Any], context: Any) -> List[str]:
|
||||
logger.info("Received message")
|
||||
|
||||
|
|
|
|||
60
backend/tests/test_address_match.py
Normal file
60
backend/tests/test_address_match.py
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
from backend.utils.addressMatch import AddressMatch
|
||||
|
||||
|
||||
class TestNormaliseAddress:
|
||||
def test_lowercases_input(self):
|
||||
assert AddressMatch.normalise_address("1 HIGH STREET") == "1 high street"
|
||||
|
||||
def test_expands_road_abbreviation(self):
|
||||
assert AddressMatch.normalise_address("1 Moreton Rd") == "1 moreton road"
|
||||
|
||||
def test_expands_avenue_abbreviation(self):
|
||||
assert AddressMatch.normalise_address("2 Park Ave") == "2 park avenue"
|
||||
|
||||
def test_removes_punctuation_keeps_slash(self):
|
||||
result = AddressMatch.normalise_address("Flat 1/A, Some Road")
|
||||
assert "," not in result
|
||||
assert "/" in result
|
||||
|
||||
def test_splits_digit_letter_suffix(self):
|
||||
assert "42 a" in AddressMatch.normalise_address("42a Some Road")
|
||||
|
||||
def test_empty_string_returns_empty(self):
|
||||
assert AddressMatch.normalise_address("") == ""
|
||||
|
||||
def test_removes_no_prefix(self):
|
||||
result = AddressMatch.normalise_address("No 5 High Street")
|
||||
assert "no" not in result.split()
|
||||
assert "5" in result
|
||||
|
||||
|
||||
class TestScore:
|
||||
def test_identical_address_scores_one(self):
|
||||
assert AddressMatch.score("1 High Street", "1 High Street") == 1.0
|
||||
|
||||
def test_case_insensitive(self):
|
||||
assert AddressMatch.score("1 HIGH STREET", "1 high street") == 1.0
|
||||
|
||||
def test_street_type_synonym_scores_one(self):
|
||||
# "Rd" expands to "road" during normalisation — should be identical
|
||||
assert AddressMatch.score("1 High Rd", "1 High Road") == 1.0
|
||||
|
||||
def test_different_building_numbers_score_zero(self):
|
||||
assert AddressMatch.score("1 High Street", "2 High Street") == 0.0
|
||||
|
||||
def test_disjoint_number_sets_score_zero(self):
|
||||
assert AddressMatch.score("1 High Street", "99 Nowhere Lane") == 0.0
|
||||
|
||||
def test_user_address_has_number_but_epc_does_not_scores_zero(self):
|
||||
assert AddressMatch.score("1 High Street", "High Street") == 0.0
|
||||
|
||||
def test_partial_address_scores_above_threshold(self):
|
||||
# Extra token in user address ("London") — same building number, high overlap
|
||||
score = AddressMatch.score("1 High Street London", "1 High Street")
|
||||
assert 0.6 <= score < 1.0
|
||||
|
||||
def test_flat_number_mismatch_scores_zero(self):
|
||||
# User has two numbers but no "flat" token; EPC has different flat number
|
||||
# Triggers the order-sensitive flat guard
|
||||
score = AddressMatch.score("3 42 High Street", "Flat 7 42 High Street")
|
||||
assert score == 0.0
|
||||
|
|
@ -1,8 +1,14 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Optional
|
||||
from difflib import SequenceMatcher
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
import requests
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class AddressMatch:
|
||||
def __init__(self):
|
||||
|
|
@ -95,6 +101,16 @@ class AddressMatch:
|
|||
tokens.append(replacement)
|
||||
return " ".join(tokens)
|
||||
|
||||
@staticmethod
|
||||
def _match_building_number(token: str, next_token: Optional[str]) -> Optional[str]:
|
||||
if re.fullmatch(r"\d+[a-z]", token):
|
||||
return token
|
||||
if re.fullmatch(r"\d+", token):
|
||||
if next_token is not None and re.fullmatch(r"[a-z]", next_token):
|
||||
return token + next_token
|
||||
return token
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def levenshtein(a: str, b: str) -> float:
|
||||
"""
|
||||
|
|
@ -121,6 +137,7 @@ class AddressMatch:
|
|||
Assumes formats like:
|
||||
- '42 moreton road'
|
||||
- 'flat 3 42 moreton road'
|
||||
- '82 a victoria square' (recombined to '82a')
|
||||
"""
|
||||
tokens = s.split()
|
||||
|
||||
|
|
@ -136,10 +153,12 @@ class AddressMatch:
|
|||
continue
|
||||
cleaned.append(t)
|
||||
|
||||
# first remaining number is building number
|
||||
for t in cleaned:
|
||||
if re.fullmatch(r"\d+[a-z]?", t):
|
||||
return t
|
||||
# first remaining number is building number; recombine with a
|
||||
# single-letter suffix when normalisation has split "82a" → "82 a"
|
||||
for i, t in enumerate(cleaned):
|
||||
nxt = cleaned[i + 1] if i + 1 < len(cleaned) else None
|
||||
if (match := AddressMatch._match_building_number(t, nxt)) is not None:
|
||||
return match
|
||||
|
||||
return None
|
||||
|
||||
|
|
@ -172,6 +191,18 @@ class AddressMatch:
|
|||
tok in a_norm for tok in ("flat", "apt", "apartment", "unit")
|
||||
)
|
||||
has_flat_token_epc = "flat" in b_norm
|
||||
# Slash-format like "3/137a" is an implicit flat reference
|
||||
# (flat 3 of 137a) even without a "flat" keyword.
|
||||
has_implicit_flat_user = bool(re.search(r"\d+\s*/\s*\d+", a_norm))
|
||||
|
||||
# EPC says it's a flat but user gave no flat indication
|
||||
# (neither keyword nor slash-format). Unlikely to be the right unit.
|
||||
if (
|
||||
has_flat_token_epc
|
||||
and not has_flat_token_user
|
||||
and not has_implicit_flat_user
|
||||
):
|
||||
return 0.0
|
||||
|
||||
if (
|
||||
len(seq_a) == 2
|
||||
|
|
@ -199,3 +230,23 @@ class AddressMatch:
|
|||
0.65 * token_score + 0.35 * char_score,
|
||||
4,
|
||||
)
|
||||
|
||||
|
||||
def score_addresses(
|
||||
df: pd.DataFrame,
|
||||
user_address: str,
|
||||
address_column: str = "address",
|
||||
) -> pd.Series:
|
||||
if address_column not in df.columns:
|
||||
raise ValueError(f"Missing column: {address_column}")
|
||||
return df[address_column].apply(lambda x: AddressMatch.score(user_address, x))
|
||||
|
||||
|
||||
def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
|
||||
"""Returns True if all non-null UPRNs in df match the given uprn."""
|
||||
if column not in df.columns:
|
||||
return False
|
||||
uprns = df[column].dropna().astype(str).str.strip().unique()
|
||||
if len(uprns) == 0:
|
||||
return False
|
||||
return len(uprns) == 1 and uprns[0] == str(uprn)
|
||||
|
|
|
|||
30
backend/utils/cloudwatch.py
Normal file
30
backend/utils/cloudwatch.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import os
|
||||
from typing import Optional
|
||||
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def build_cloudwatch_log_url(start_ms: Optional[int]) -> str:
|
||||
"""
|
||||
Build a CloudWatch Logs URL for the current Lambda invocation, including a
|
||||
timestamp window starting at start_ms. Requires AWS_REGION,
|
||||
AWS_LAMBDA_LOG_GROUP_NAME, and AWS_LAMBDA_LOG_STREAM_NAME to be set in the
|
||||
environment — i.e. only safe to call inside a Lambda runtime.
|
||||
"""
|
||||
logger.info("Building cloudwatch logs URL")
|
||||
region = os.environ["AWS_REGION"]
|
||||
log_group = os.environ["AWS_LAMBDA_LOG_GROUP_NAME"]
|
||||
log_stream = os.environ["AWS_LAMBDA_LOG_STREAM_NAME"]
|
||||
|
||||
encoded_group = log_group.replace("/", "$252F")
|
||||
encoded_stream = log_stream.replace("/", "$252F")
|
||||
|
||||
return (
|
||||
f"https://console.aws.amazon.com/cloudwatch/home?"
|
||||
f"region={region}"
|
||||
f"#logsV2:log-groups/log-group/{encoded_group}"
|
||||
f"/log-events/{encoded_stream}"
|
||||
f"$3Fstart={start_ms}"
|
||||
)
|
||||
|
|
@ -1,75 +1,72 @@
|
|||
# decorators/subtask_handler.py
|
||||
|
||||
from functools import wraps
|
||||
from typing import Callable, Any
|
||||
from uuid import UUID
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from functools import wraps
|
||||
from typing import Any, Callable, Optional, cast
|
||||
from uuid import UUID
|
||||
|
||||
from backend.app.db.functions.tasks.Tasks import SubTaskInterface, TasksInterface
|
||||
from backend.app.db.models.tasks import SourceEnum
|
||||
from backend.utils.cloudwatch import build_cloudwatch_log_url
|
||||
from utils.logger import setup_logger
|
||||
|
||||
|
||||
def subtask_handler():
|
||||
"""
|
||||
Decorator that wraps your existing handler and automatically:
|
||||
def _try_build_cloud_logs_url(start_ms: int) -> Optional[str]:
|
||||
# Returns None outside a Lambda runtime so local/non-Lambda runs don't crash.
|
||||
required = ("AWS_REGION", "AWS_LAMBDA_LOG_GROUP_NAME", "AWS_LAMBDA_LOG_STREAM_NAME")
|
||||
if not all(k in os.environ for k in required):
|
||||
return None
|
||||
return build_cloudwatch_log_url(start_ms)
|
||||
|
||||
- Extracts task_id + sub_task_id from event
|
||||
- Marks subtask as in progress
|
||||
- Executes handler logic
|
||||
- Marks subtask complete on success
|
||||
- Marks failed on exception
|
||||
|
||||
def subtask_handler() -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
||||
"""
|
||||
Decorator for Lambdas that operate on an already-existing SubTask. Extracts
|
||||
task_id + sub_task_id from each record, records the CloudWatch logs URL,
|
||||
marks the SubTask in progress, then complete on success / failed on raise.
|
||||
"""
|
||||
|
||||
def decorator(func: Callable[..., Any]):
|
||||
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(event: dict[str, Any], context: Any, *args, **kwargs):
|
||||
def wrapper(event: dict[str, Any], context: Any, *args: Any, **kwargs: Any) -> None:
|
||||
start_ms = int(time.time() * 1000)
|
||||
cloud_logs_url = _try_build_cloud_logs_url(start_ms)
|
||||
|
||||
records = event.get("Records", [event])
|
||||
|
||||
interface = SubTaskInterface()
|
||||
|
||||
for record in records:
|
||||
|
||||
# -------------------------------
|
||||
# Parse body safely
|
||||
# -------------------------------
|
||||
body = {}
|
||||
|
||||
if isinstance(record.get("body"), str):
|
||||
raw_body = record.get("body")
|
||||
body: dict[str, Any]
|
||||
if isinstance(raw_body, str):
|
||||
try:
|
||||
body = json.loads(record["body"])
|
||||
body = json.loads(raw_body)
|
||||
except Exception:
|
||||
body = {}
|
||||
elif isinstance(raw_body, dict):
|
||||
body = cast(dict[str, Any], raw_body)
|
||||
else:
|
||||
body = record.get("body", {}) or {}
|
||||
body = {}
|
||||
|
||||
task_id_raw = body.get("task_id")
|
||||
subtask_id_raw = body.get("sub_task_id")
|
||||
|
||||
task_id = UUID(task_id_raw) if isinstance(task_id_raw, str) else None
|
||||
subtask_id = (
|
||||
UUID(subtask_id_raw) if isinstance(subtask_id_raw, str) else None
|
||||
)
|
||||
subtask_id = UUID(subtask_id_raw) if isinstance(subtask_id_raw, str) else None
|
||||
|
||||
if not task_id or not subtask_id:
|
||||
raise RuntimeError("task_id or sub_task_id missing")
|
||||
|
||||
# -------------------------------
|
||||
# Mark in progress
|
||||
# -------------------------------
|
||||
interface.update_subtask_status(
|
||||
subtask_id=subtask_id,
|
||||
status="in progress",
|
||||
cloud_logs_url=cloud_logs_url,
|
||||
)
|
||||
|
||||
try:
|
||||
# Pass the parsed body into your function
|
||||
result = func(body, context, *args, **kwargs)
|
||||
|
||||
# -------------------------------
|
||||
# Success → mark complete
|
||||
# -------------------------------
|
||||
interface.update_subtask_status(
|
||||
subtask_id=subtask_id,
|
||||
status="complete",
|
||||
|
|
@ -77,75 +74,79 @@ def subtask_handler():
|
|||
)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
# -------------------------------
|
||||
# Failure → mark failed
|
||||
# -------------------------------
|
||||
interface.update_subtask_status(
|
||||
subtask_id=subtask_id,
|
||||
status="failed",
|
||||
outputs={"error": str(e)},
|
||||
)
|
||||
|
||||
raise
|
||||
|
||||
return None
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def task_handler():
|
||||
def task_handler(
|
||||
task_source: str,
|
||||
source: SourceEnum,
|
||||
) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
||||
"""
|
||||
Decorator that wraps a Lambda handler and automatically:
|
||||
|
||||
- Parses body from the first SQS record (or uses the event dict directly)
|
||||
- Creates a fresh Task + SubTask in the database
|
||||
- Marks the subtask as in progress
|
||||
- Executes the handler, passing the parsed body
|
||||
- Marks complete on success, failed on exception (and re-raises)
|
||||
Decorator for Lambdas that are themselves the entry point of a pipeline (no
|
||||
router in front). For each record the decorator creates a fresh Task +
|
||||
SubTask with the given task_source and source. source_id is read from
|
||||
body[source.value] (silent None if absent) — see ADR-0001. Records the
|
||||
CloudWatch logs URL, marks the SubTask in progress, then complete on
|
||||
success / failed on raise.
|
||||
"""
|
||||
|
||||
def decorator(func: Callable[..., Any]):
|
||||
|
||||
task_source = f"{func.__module__}.{func.__qualname__}"
|
||||
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
||||
|
||||
@wraps(func)
|
||||
def wrapper(event: dict[str, Any], context: Any, *args, **kwargs):
|
||||
def wrapper(event: dict[str, Any], context: Any, *args: Any, **kwargs: Any) -> Any:
|
||||
logger = setup_logger()
|
||||
start_ms = int(time.time() * 1000)
|
||||
cloud_logs_url = _try_build_cloud_logs_url(start_ms)
|
||||
|
||||
records = event.get("Records", [event]) # fallback for non-SQS
|
||||
|
||||
results = []
|
||||
failures = []
|
||||
records = event.get("Records", [event])
|
||||
results: list[Any] = []
|
||||
failures: list[dict[str, Any]] = []
|
||||
interface = SubTaskInterface()
|
||||
|
||||
for record in records:
|
||||
# Parse body
|
||||
raw_body = record.get("body", record)
|
||||
|
||||
body: dict[str, Any]
|
||||
if isinstance(raw_body, str):
|
||||
try:
|
||||
body = json.loads(raw_body)
|
||||
except Exception:
|
||||
body = {}
|
||||
elif isinstance(raw_body, dict):
|
||||
body = cast(dict[str, Any], raw_body)
|
||||
else:
|
||||
body = raw_body or {}
|
||||
body = {}
|
||||
|
||||
raw_source_id = body.get(source.value)
|
||||
source_id: Optional[str] = (
|
||||
str(raw_source_id) if raw_source_id is not None else None
|
||||
)
|
||||
|
||||
# Create task per message
|
||||
logger.info("Creating task for source: %s", task_source)
|
||||
task_id, subtask_id = TasksInterface.create_task(
|
||||
task_source=task_source,
|
||||
inputs=body,
|
||||
source=source,
|
||||
source_id=source_id,
|
||||
)
|
||||
|
||||
logger.info("Created task_id=%s subtask_id=%s", task_id, subtask_id)
|
||||
if subtask_id is None:
|
||||
raise RuntimeError("create_task did not return a subtask_id")
|
||||
|
||||
interface = SubTaskInterface()
|
||||
logger.info("Created task_id=%s subtask_id=%s", task_id, subtask_id)
|
||||
|
||||
interface.update_subtask_status(
|
||||
subtask_id=subtask_id,
|
||||
status="in progress",
|
||||
cloud_logs_url=cloud_logs_url,
|
||||
)
|
||||
|
||||
try:
|
||||
|
|
@ -172,13 +173,11 @@ def task_handler():
|
|||
if "Records" in event:
|
||||
failures.append({"itemIdentifier": record["messageId"]})
|
||||
else:
|
||||
# Handle non-SQS events
|
||||
raise
|
||||
|
||||
if "Records" in event:
|
||||
return {"batchItemFailures": failures}
|
||||
|
||||
# Handle non-SQS events
|
||||
return results
|
||||
|
||||
return wrapper
|
||||
|
|
|
|||
10
conftest.py
10
conftest.py
|
|
@ -1,11 +1,9 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
from backend.app.config import get_settings
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
# Load .env in conftest.py directory for local development
|
||||
load_dotenv()
|
||||
load_dotenv(Path(__file__).resolve().parent / "backend" / ".env")
|
||||
|
||||
DEFAULT_ENV = {
|
||||
"API_KEY": "test",
|
||||
|
|
@ -18,6 +16,10 @@ DEFAULT_ENV = {
|
|||
"EPC_AUTH_TOKEN",
|
||||
"test",
|
||||
), # overridden in GitHub Actions
|
||||
"OPEN_EPC_API_TOKEN": os.getenv(
|
||||
"OPEN_EPC_API_TOKEN",
|
||||
"test",
|
||||
), # overridden in GitHub Actions
|
||||
"GOOGLE_SOLAR_API_KEY": "test",
|
||||
"DB_HOST": "localhost",
|
||||
"DB_USERNAME": "test",
|
||||
|
|
|
|||
98
datatypes/epc/domain/historic_epc.py
Normal file
98
datatypes/epc/domain/historic_epc.py
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class HistoricEpc:
|
||||
lmk_key: str
|
||||
address1: str
|
||||
address2: str
|
||||
address3: str
|
||||
postcode: str
|
||||
building_reference_number: str
|
||||
current_energy_rating: str
|
||||
potential_energy_rating: str
|
||||
current_energy_efficiency: str
|
||||
potential_energy_efficiency: str
|
||||
property_type: str
|
||||
built_form: str
|
||||
inspection_date: str
|
||||
local_authority: str
|
||||
constituency: str
|
||||
county: str
|
||||
lodgement_date: str
|
||||
transaction_type: str
|
||||
environment_impact_current: str
|
||||
environment_impact_potential: str
|
||||
energy_consumption_current: str
|
||||
energy_consumption_potential: str
|
||||
co2_emissions_current: str
|
||||
co2_emiss_curr_per_floor_area: str
|
||||
co2_emissions_potential: str
|
||||
lighting_cost_current: str
|
||||
lighting_cost_potential: str
|
||||
heating_cost_current: str
|
||||
heating_cost_potential: str
|
||||
hot_water_cost_current: str
|
||||
hot_water_cost_potential: str
|
||||
total_floor_area: str
|
||||
energy_tariff: str
|
||||
mains_gas_flag: str
|
||||
floor_level: str
|
||||
flat_top_storey: str
|
||||
flat_storey_count: str
|
||||
main_heating_controls: str
|
||||
multi_glaze_proportion: str
|
||||
glazed_type: str
|
||||
glazed_area: str
|
||||
extension_count: str
|
||||
number_habitable_rooms: str
|
||||
number_heated_rooms: str
|
||||
low_energy_lighting: str
|
||||
number_open_fireplaces: str
|
||||
hotwater_description: str
|
||||
hot_water_energy_eff: str
|
||||
hot_water_env_eff: str
|
||||
floor_description: str
|
||||
floor_energy_eff: str
|
||||
floor_env_eff: str
|
||||
windows_description: str
|
||||
windows_energy_eff: str
|
||||
windows_env_eff: str
|
||||
walls_description: str
|
||||
walls_energy_eff: str
|
||||
walls_env_eff: str
|
||||
secondheat_description: str
|
||||
sheating_energy_eff: str
|
||||
sheating_env_eff: str
|
||||
roof_description: str
|
||||
roof_energy_eff: str
|
||||
roof_env_eff: str
|
||||
mainheat_description: str
|
||||
mainheat_energy_eff: str
|
||||
mainheat_env_eff: str
|
||||
mainheatcont_description: str
|
||||
mainheatc_energy_eff: str
|
||||
mainheatc_env_eff: str
|
||||
lighting_description: str
|
||||
lighting_energy_eff: str
|
||||
lighting_env_eff: str
|
||||
main_fuel: str
|
||||
wind_turbine_count: str
|
||||
heat_loss_corridor: str
|
||||
unheated_corridor_length: str
|
||||
floor_height: str
|
||||
photo_supply: str
|
||||
solar_water_heating_flag: str
|
||||
mechanical_ventilation: str
|
||||
address: str
|
||||
local_authority_label: str
|
||||
constituency_label: str
|
||||
posttown: str
|
||||
construction_age_band: str
|
||||
lodgement_datetime: str
|
||||
tenure: str
|
||||
fixed_lighting_outlets_count: str
|
||||
low_energy_fixed_light_count: str
|
||||
uprn: str
|
||||
uprn_source: str
|
||||
report_type: str
|
||||
104
datatypes/epc/domain/historic_epc_matching.py
Normal file
104
datatypes/epc/domain/historic_epc_matching.py
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
import pandas as pd
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from backend.address2UPRN.scoring import rank_address_similarity
|
||||
from backend.utils.addressMatch import AddressMatch
|
||||
from datatypes.epc.domain.historic_epc import HistoricEpc
|
||||
from utils.pandas_utils import pandas_cell_to_str
|
||||
from utils.s3 import parse_s3_uri, read_csv_gz_from_s3
|
||||
|
||||
DEFAULT_S3_ROOT = "s3://retrofit-data-dev/historical_epc"
|
||||
|
||||
_EXTRA_COLS = {"lexiscore", "lexirank"}
|
||||
|
||||
|
||||
def _map_historic_epc_pandas_row_to_domain(row: pd.Series) -> HistoricEpc:
|
||||
kwargs = {
|
||||
col.lower(): pandas_cell_to_str(val)
|
||||
for col, val in row.items()
|
||||
if col.lower() not in _EXTRA_COLS
|
||||
}
|
||||
return HistoricEpc(**kwargs)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ScoredHistoricEpc:
|
||||
record: HistoricEpc
|
||||
lexiscore: float
|
||||
lexirank: int
|
||||
|
||||
|
||||
@dataclass
|
||||
class HistoricEpcMatches:
|
||||
user_address: str
|
||||
postcode: str
|
||||
matches: list[ScoredHistoricEpc]
|
||||
|
||||
def top(self) -> Optional[ScoredHistoricEpc]:
|
||||
return self.matches[0] if self.matches else None
|
||||
|
||||
def top_n(self, k: int) -> list[ScoredHistoricEpc]:
|
||||
return self.matches[:k]
|
||||
|
||||
def unambiguous_uprn(self) -> Optional[str]:
|
||||
top = self.top()
|
||||
if top is None or top.lexiscore <= 0:
|
||||
return None
|
||||
rank1 = [m for m in self.matches if m.lexirank == top.lexirank]
|
||||
uprns = {m.record.uprn for m in rank1 if m.record.uprn}
|
||||
return next(iter(uprns)) if len(uprns) == 1 else None
|
||||
|
||||
|
||||
def _sanitise_postcode(postcode: str) -> str:
|
||||
cleaned = (postcode or "").upper().replace(" ", "")
|
||||
if not cleaned:
|
||||
raise ValueError("postcode must contain non-whitespace characters")
|
||||
if not AddressMatch.is_valid_postcode(cleaned):
|
||||
raise ValueError(f"postcode {cleaned!r} is not a valid UK postcode")
|
||||
return cleaned
|
||||
|
||||
|
||||
def match_addresses_for_postcode(
|
||||
user_address: str,
|
||||
postcode: str,
|
||||
*,
|
||||
s3_root: str = DEFAULT_S3_ROOT,
|
||||
address_column: str = "ADDRESS",
|
||||
uprn_column: str = "UPRN",
|
||||
) -> HistoricEpcMatches:
|
||||
if not user_address:
|
||||
raise ValueError("user_address must be non-empty")
|
||||
|
||||
pc = _sanitise_postcode(postcode)
|
||||
bucket, root_prefix = parse_s3_uri(s3_root)
|
||||
key = f"{root_prefix.rstrip('/')}/{pc}/data.csv.gz"
|
||||
|
||||
try:
|
||||
df = read_csv_gz_from_s3(bucket, key)
|
||||
except ClientError as e:
|
||||
if e.response.get("Error", {}).get("Code") in ("NoSuchKey", "404"):
|
||||
raise FileNotFoundError(
|
||||
f"No historic EPC data at s3://{bucket}/{key}"
|
||||
) from e
|
||||
raise
|
||||
|
||||
scored = rank_address_similarity(
|
||||
df,
|
||||
user_address=user_address,
|
||||
address_column=address_column,
|
||||
uprn_column=uprn_column,
|
||||
)
|
||||
|
||||
matches = [
|
||||
ScoredHistoricEpc(
|
||||
record=_map_historic_epc_pandas_row_to_domain(row),
|
||||
lexiscore=float(row["lexiscore"]),
|
||||
lexirank=int(row["lexirank"]),
|
||||
)
|
||||
for _, row in scored.iterrows()
|
||||
]
|
||||
|
||||
return HistoricEpcMatches(user_address=user_address, postcode=pc, matches=matches)
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
from datetime import date
|
||||
from typing import List, Optional, Sequence, Union
|
||||
from typing import List, Optional, Sequence, Union, Dict, Any
|
||||
from datatypes.epc.schema.helpers import from_dict
|
||||
|
||||
from datatypes.epc.domain.epc_property_data import (
|
||||
EnergyElement,
|
||||
|
|
@ -1525,6 +1526,29 @@ class EpcPropertyDataMapper:
|
|||
) -> List[EnergyElement]:
|
||||
return [EpcPropertyDataMapper._map_energy_element(e) for e in elements]
|
||||
|
||||
@staticmethod
|
||||
def from_api_response(data: Dict[str, Any]) -> "EpcPropertyData":
|
||||
"""
|
||||
Dispatch to the correct schema mapper based on schema_type.
|
||||
Supports RdSAP-Schema-21.0.0 and RdSAP-Schema-21.0.1 only.
|
||||
Raises ValueError for unsupported schemas — add cases here as needed.
|
||||
"""
|
||||
|
||||
schema = data.get("schema_type", "")
|
||||
if schema == "RdSAP-Schema-21.0.1":
|
||||
from datatypes.epc.schema.rdsap_schema_21_0_1 import RdSapSchema21_0_1
|
||||
|
||||
return EpcPropertyDataMapper.from_rdsap_schema_21_0_1(
|
||||
from_dict(RdSapSchema21_0_1, data)
|
||||
)
|
||||
if schema == "RdSAP-Schema-21.0.0":
|
||||
from datatypes.epc.schema.rdsap_schema_21_0_0 import RdSapSchema21_0_0
|
||||
|
||||
return EpcPropertyDataMapper.from_rdsap_schema_21_0_0(
|
||||
from_dict(RdSapSchema21_0_0, data)
|
||||
)
|
||||
raise ValueError(f"Unsupported EPC schema: {schema!r}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Private helpers
|
||||
|
|
|
|||
324
datatypes/epc/domain/tests/test_historic_epc_matching.py
Normal file
324
datatypes/epc/domain/tests/test_historic_epc_matching.py
Normal file
|
|
@ -0,0 +1,324 @@
|
|||
from typing import Optional
|
||||
from unittest.mock import patch
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from datatypes.epc.domain import historic_epc_matching as matcher_mod
|
||||
from datatypes.epc.domain.historic_epc_matching import (
|
||||
HistoricEpcMatches,
|
||||
ScoredHistoricEpc,
|
||||
_sanitise_postcode,
|
||||
match_addresses_for_postcode,
|
||||
)
|
||||
|
||||
# Columns required by the HistoricEpc dataclass (lower-cased CSV columns).
|
||||
# The matcher only reads ADDRESS + UPRN to score; everything else is filled
|
||||
# with "" but must be present for HistoricEpc(**kwargs) to construct.
|
||||
_FULL_COLUMN_FIELDS = [
|
||||
"LMK_KEY",
|
||||
"ADDRESS1",
|
||||
"ADDRESS2",
|
||||
"ADDRESS3",
|
||||
"POSTCODE",
|
||||
"BUILDING_REFERENCE_NUMBER",
|
||||
"CURRENT_ENERGY_RATING",
|
||||
"POTENTIAL_ENERGY_RATING",
|
||||
"CURRENT_ENERGY_EFFICIENCY",
|
||||
"POTENTIAL_ENERGY_EFFICIENCY",
|
||||
"PROPERTY_TYPE",
|
||||
"BUILT_FORM",
|
||||
"INSPECTION_DATE",
|
||||
"LOCAL_AUTHORITY",
|
||||
"CONSTITUENCY",
|
||||
"COUNTY",
|
||||
"LODGEMENT_DATE",
|
||||
"TRANSACTION_TYPE",
|
||||
"ENVIRONMENT_IMPACT_CURRENT",
|
||||
"ENVIRONMENT_IMPACT_POTENTIAL",
|
||||
"ENERGY_CONSUMPTION_CURRENT",
|
||||
"ENERGY_CONSUMPTION_POTENTIAL",
|
||||
"CO2_EMISSIONS_CURRENT",
|
||||
"CO2_EMISS_CURR_PER_FLOOR_AREA",
|
||||
"CO2_EMISSIONS_POTENTIAL",
|
||||
"LIGHTING_COST_CURRENT",
|
||||
"LIGHTING_COST_POTENTIAL",
|
||||
"HEATING_COST_CURRENT",
|
||||
"HEATING_COST_POTENTIAL",
|
||||
"HOT_WATER_COST_CURRENT",
|
||||
"HOT_WATER_COST_POTENTIAL",
|
||||
"TOTAL_FLOOR_AREA",
|
||||
"ENERGY_TARIFF",
|
||||
"MAINS_GAS_FLAG",
|
||||
"FLOOR_LEVEL",
|
||||
"FLAT_TOP_STOREY",
|
||||
"FLAT_STOREY_COUNT",
|
||||
"MAIN_HEATING_CONTROLS",
|
||||
"MULTI_GLAZE_PROPORTION",
|
||||
"GLAZED_TYPE",
|
||||
"GLAZED_AREA",
|
||||
"EXTENSION_COUNT",
|
||||
"NUMBER_HABITABLE_ROOMS",
|
||||
"NUMBER_HEATED_ROOMS",
|
||||
"LOW_ENERGY_LIGHTING",
|
||||
"NUMBER_OPEN_FIREPLACES",
|
||||
"HOTWATER_DESCRIPTION",
|
||||
"HOT_WATER_ENERGY_EFF",
|
||||
"HOT_WATER_ENV_EFF",
|
||||
"FLOOR_DESCRIPTION",
|
||||
"FLOOR_ENERGY_EFF",
|
||||
"FLOOR_ENV_EFF",
|
||||
"WINDOWS_DESCRIPTION",
|
||||
"WINDOWS_ENERGY_EFF",
|
||||
"WINDOWS_ENV_EFF",
|
||||
"WALLS_DESCRIPTION",
|
||||
"WALLS_ENERGY_EFF",
|
||||
"WALLS_ENV_EFF",
|
||||
"SECONDHEAT_DESCRIPTION",
|
||||
"SHEATING_ENERGY_EFF",
|
||||
"SHEATING_ENV_EFF",
|
||||
"ROOF_DESCRIPTION",
|
||||
"ROOF_ENERGY_EFF",
|
||||
"ROOF_ENV_EFF",
|
||||
"MAINHEAT_DESCRIPTION",
|
||||
"MAINHEAT_ENERGY_EFF",
|
||||
"MAINHEAT_ENV_EFF",
|
||||
"MAINHEATCONT_DESCRIPTION",
|
||||
"MAINHEATC_ENERGY_EFF",
|
||||
"MAINHEATC_ENV_EFF",
|
||||
"LIGHTING_DESCRIPTION",
|
||||
"LIGHTING_ENERGY_EFF",
|
||||
"LIGHTING_ENV_EFF",
|
||||
"MAIN_FUEL",
|
||||
"WIND_TURBINE_COUNT",
|
||||
"HEAT_LOSS_CORRIDOR",
|
||||
"UNHEATED_CORRIDOR_LENGTH",
|
||||
"FLOOR_HEIGHT",
|
||||
"PHOTO_SUPPLY",
|
||||
"SOLAR_WATER_HEATING_FLAG",
|
||||
"MECHANICAL_VENTILATION",
|
||||
"ADDRESS",
|
||||
"LOCAL_AUTHORITY_LABEL",
|
||||
"CONSTITUENCY_LABEL",
|
||||
"POSTTOWN",
|
||||
"CONSTRUCTION_AGE_BAND",
|
||||
"LODGEMENT_DATETIME",
|
||||
"TENURE",
|
||||
"FIXED_LIGHTING_OUTLETS_COUNT",
|
||||
"LOW_ENERGY_FIXED_LIGHT_COUNT",
|
||||
"UPRN",
|
||||
"UPRN_SOURCE",
|
||||
"REPORT_TYPE",
|
||||
]
|
||||
|
||||
|
||||
def _row(address: str, uprn) -> dict:
|
||||
row = {col: "" for col in _FULL_COLUMN_FIELDS}
|
||||
row["ADDRESS"] = address
|
||||
row["UPRN"] = uprn
|
||||
return row
|
||||
|
||||
|
||||
def _build_df(rows: list[dict]) -> pd.DataFrame:
|
||||
return pd.DataFrame(rows, columns=_FULL_COLUMN_FIELDS)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patch_postcode_valid():
|
||||
with patch.object(
|
||||
matcher_mod.AddressMatch, "is_valid_postcode", return_value=True
|
||||
) as m:
|
||||
yield m
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patch_read():
|
||||
with patch.object(matcher_mod, "read_csv_gz_from_s3") as m:
|
||||
yield m
|
||||
|
||||
|
||||
# ---------- _sanitise_postcode ----------
|
||||
|
||||
|
||||
class TestSanitisePostcode:
|
||||
|
||||
def test_uppercases_and_strips_spaces(self, patch_postcode_valid):
|
||||
assert _sanitise_postcode("ab33 8al") == "AB338AL"
|
||||
|
||||
def test_empty_raises(self, patch_postcode_valid):
|
||||
with pytest.raises(ValueError, match="non-whitespace"):
|
||||
_sanitise_postcode("")
|
||||
|
||||
def test_whitespace_only_raises(self, patch_postcode_valid):
|
||||
with pytest.raises(ValueError, match="non-whitespace"):
|
||||
_sanitise_postcode(" ")
|
||||
|
||||
def test_invalid_postcode_raises(self):
|
||||
with patch.object(
|
||||
matcher_mod.AddressMatch, "is_valid_postcode", return_value=False
|
||||
):
|
||||
with pytest.raises(ValueError, match="not a valid UK postcode"):
|
||||
_sanitise_postcode("NONSENSE")
|
||||
|
||||
|
||||
# ---------- match_addresses_for_postcode ----------
|
||||
|
||||
|
||||
class TestMatchAddressesForPostcode:
|
||||
|
||||
def test_preserves_row_count_including_zero_score_rows(
|
||||
self, patch_read, patch_postcode_valid
|
||||
):
|
||||
# Disjoint number sets => hard zero. Still kept in matches.
|
||||
patch_read.return_value = _build_df(
|
||||
[
|
||||
_row("47 GORDON ROAD", "100"),
|
||||
_row("999 SOMEWHERE ELSE", "200"),
|
||||
]
|
||||
)
|
||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||
assert isinstance(result, HistoricEpcMatches)
|
||||
assert len(result.matches) == 2
|
||||
|
||||
def test_top_has_lexirank_one_and_lexiscore_monotone(
|
||||
self, patch_read, patch_postcode_valid
|
||||
):
|
||||
patch_read.return_value = _build_df(
|
||||
[
|
||||
_row("48 GORDON ROAD", "200"), # near miss
|
||||
_row("47 GORDON ROAD", "100"), # exact (after normalisation)
|
||||
]
|
||||
)
|
||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||
assert result.top().lexirank == 1
|
||||
scores = [m.lexiscore for m in result.matches]
|
||||
assert scores == sorted(scores, reverse=True)
|
||||
|
||||
def test_s3_key_built_from_default_root(self, patch_read, patch_postcode_valid):
|
||||
patch_read.return_value = _build_df([_row("47 GORDON ROAD", "100")])
|
||||
match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||
patch_read.assert_called_once_with(
|
||||
"retrofit-data-dev", "historical_epc/AB338AL/data.csv.gz"
|
||||
)
|
||||
|
||||
def test_s3_key_respects_custom_root_with_trailing_slash(
|
||||
self, patch_read, patch_postcode_valid
|
||||
):
|
||||
patch_read.return_value = _build_df([_row("47 GORDON ROAD", "100")])
|
||||
match_addresses_for_postcode(
|
||||
"47 Gordon Road",
|
||||
"AB33 8AL",
|
||||
s3_root="s3://my-bucket/some/prefix/",
|
||||
)
|
||||
patch_read.assert_called_once_with(
|
||||
"my-bucket", "some/prefix/AB338AL/data.csv.gz"
|
||||
)
|
||||
|
||||
def test_no_such_key_translates_to_filenotfound(
|
||||
self, patch_read, patch_postcode_valid
|
||||
):
|
||||
patch_read.side_effect = ClientError(
|
||||
{"Error": {"Code": "NoSuchKey", "Message": "missing"}}, "GetObject"
|
||||
)
|
||||
with pytest.raises(FileNotFoundError):
|
||||
match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||
|
||||
def test_other_client_error_propagates(self, patch_read, patch_postcode_valid):
|
||||
patch_read.side_effect = ClientError(
|
||||
{"Error": {"Code": "AccessDenied", "Message": "nope"}}, "GetObject"
|
||||
)
|
||||
with pytest.raises(ClientError):
|
||||
match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||
|
||||
def test_empty_user_address_raises(self, patch_postcode_valid):
|
||||
with pytest.raises(ValueError, match="user_address"):
|
||||
match_addresses_for_postcode("", "AB33 8AL")
|
||||
|
||||
|
||||
# ---------- unambiguous_uprn ----------
|
||||
|
||||
|
||||
class TestUnambiguousUprn:
|
||||
|
||||
def test_exact_match_returns_uprn(self, patch_read, patch_postcode_valid):
|
||||
patch_read.return_value = _build_df(
|
||||
[
|
||||
_row("47 GORDON ROAD", "100"),
|
||||
_row("48 GORDON ROAD", "200"),
|
||||
]
|
||||
)
|
||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||
assert result.unambiguous_uprn() == "100"
|
||||
|
||||
def test_ambiguous_tie_returns_none(self, patch_read, patch_postcode_valid):
|
||||
# Two duplicate addresses with different UPRNs share rank-1.
|
||||
patch_read.return_value = _build_df(
|
||||
[
|
||||
_row("47 GORDON ROAD", "100"),
|
||||
_row("47 GORDON ROAD", "200"),
|
||||
]
|
||||
)
|
||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||
assert result.unambiguous_uprn() is None
|
||||
|
||||
def test_all_zero_score_returns_none_even_when_uprn_unique(
|
||||
self, patch_read, patch_postcode_valid
|
||||
):
|
||||
# User address has building number 47; no row has 47 -> all hard-zero.
|
||||
patch_read.return_value = _build_df(
|
||||
[
|
||||
_row("999 ELSEWHERE", "100"),
|
||||
_row("888 ELSEWHERE", "200"),
|
||||
]
|
||||
)
|
||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||
assert all(m.lexiscore == 0.0 for m in result.matches)
|
||||
assert result.unambiguous_uprn() is None
|
||||
|
||||
def test_nan_uprn_becomes_empty_string_not_nan(
|
||||
self, patch_read, patch_postcode_valid
|
||||
):
|
||||
# Use a real NaN in the UPRN cell.
|
||||
patch_read.return_value = _build_df(
|
||||
[
|
||||
_row("47 GORDON ROAD", np.nan),
|
||||
_row("48 GORDON ROAD", "200"),
|
||||
]
|
||||
)
|
||||
result: HistoricEpcMatches = match_addresses_for_postcode(
|
||||
"47 Gordon Road", "AB33 8AL"
|
||||
)
|
||||
top: Optional[ScoredHistoricEpc] = result.top()
|
||||
# pandas_cell_to_str must turn NaN/"nan" into "" (not the literal string "nan"),
|
||||
# so unambiguous_uprn's truthiness check correctly drops the row.
|
||||
if top:
|
||||
assert top.record.uprn == ""
|
||||
else:
|
||||
pytest.fail("should have an epc score, no results found :(")
|
||||
|
||||
|
||||
# ---------- top / top_n ----------
|
||||
|
||||
|
||||
class TestTopHelpers:
|
||||
|
||||
def test_top_n_returns_first_k(self, patch_read, patch_postcode_valid):
|
||||
patch_read.return_value = _build_df(
|
||||
[
|
||||
_row("47 GORDON ROAD", "100"),
|
||||
_row("48 GORDON ROAD", "200"),
|
||||
_row("49 GORDON ROAD", "300"),
|
||||
]
|
||||
)
|
||||
result = match_addresses_for_postcode("47 Gordon Road", "AB33 8AL")
|
||||
top2 = result.top_n(2)
|
||||
assert len(top2) == 2
|
||||
assert all(isinstance(m, ScoredHistoricEpc) for m in top2)
|
||||
|
||||
def test_top_on_empty_matches_returns_none(self):
|
||||
empty = HistoricEpcMatches(user_address="x", postcode="AB338AL", matches=[])
|
||||
assert empty.top() is None
|
||||
assert empty.top_n(5) == []
|
||||
assert empty.unambiguous_uprn() is None
|
||||
0
datatypes/epc/loaders/__init__.py
Normal file
0
datatypes/epc/loaders/__init__.py
Normal file
18
datatypes/epc/loaders/historic_epc.py
Normal file
18
datatypes/epc/loaders/historic_epc.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
import csv
|
||||
|
||||
from datatypes.epc.domain.historic_epc import HistoricEpc
|
||||
|
||||
|
||||
def _normalise(value: str | None) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
return value.replace("\xa0", " ")
|
||||
|
||||
|
||||
def read_historic_epc_csv(path: str) -> list[HistoricEpc]:
|
||||
with open(path, newline="", encoding="utf-8") as f:
|
||||
reader = csv.DictReader(f)
|
||||
return [
|
||||
HistoricEpc(**{k.lower(): _normalise(v) for k, v in row.items()})
|
||||
for row in reader
|
||||
]
|
||||
77
datatypes/epc/schema/helpers.py
Normal file
77
datatypes/epc/schema/helpers.py
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
import dataclasses
|
||||
import typing
|
||||
from datetime import date
|
||||
from typing import Any, Dict, Type, TypeVar
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def from_dict(cls: Type[T], data: Dict[str, Any]) -> T:
|
||||
"""
|
||||
Recursively convert a plain dict (e.g. from json.loads) into the given
|
||||
dataclass type, using the field type hints to convert nested structures.
|
||||
|
||||
Handles:
|
||||
- Nested dataclasses
|
||||
- List[SomeDataclass]
|
||||
- Optional[X] / Union[X, None]
|
||||
- Union[DataclassType, primitive] (e.g. Union[Measurement, int])
|
||||
- Primitive pass-through for Union[str, int] etc.
|
||||
"""
|
||||
return _from_dict_impl(cls, data) # type: ignore[return-value]
|
||||
|
||||
|
||||
def _from_dict_impl(cls: Any, data: Any) -> Any:
|
||||
hints = typing.get_type_hints(cls)
|
||||
kwargs: Dict[str, Any] = {}
|
||||
|
||||
for field in dataclasses.fields(cls): # type: ignore[arg-type]
|
||||
has_default = (
|
||||
field.default is not dataclasses.MISSING
|
||||
or field.default_factory is not dataclasses.MISSING # type: ignore[misc]
|
||||
)
|
||||
if field.name not in data:
|
||||
if has_default:
|
||||
continue
|
||||
raise ValueError(f"{cls.__name__}: missing required field '{field.name}'")
|
||||
|
||||
kwargs[field.name] = _coerce(data[field.name], hints[field.name])
|
||||
|
||||
return cls(**kwargs)
|
||||
|
||||
|
||||
def _coerce(value: Any, hint: Any) -> Any:
|
||||
if value is None:
|
||||
return None
|
||||
|
||||
origin = typing.get_origin(hint)
|
||||
args = typing.get_args(hint)
|
||||
|
||||
# Union (includes Optional[X] which is Union[X, None])
|
||||
if origin is typing.Union:
|
||||
if value is None:
|
||||
return None
|
||||
non_none_args = [a for a in args if a is not type(None)]
|
||||
if len(non_none_args) == 1:
|
||||
# Optional[X] — recurse so List[X] and nested dataclasses are handled
|
||||
return _coerce(value, non_none_args[0])
|
||||
# Multi-type Union (e.g. Union[Measurement, int]): try dataclasses first
|
||||
for arg in non_none_args:
|
||||
if dataclasses.is_dataclass(arg) and isinstance(value, dict):
|
||||
return _from_dict_impl(arg, value)
|
||||
# All remaining args are primitives — return value as-is
|
||||
return value
|
||||
|
||||
# List[X]
|
||||
if origin is list:
|
||||
item_hint = args[0]
|
||||
return [_coerce(item, item_hint) for item in value]
|
||||
|
||||
# Plain dataclass
|
||||
if dataclasses.is_dataclass(hint) and isinstance(value, dict):
|
||||
return _from_dict_impl(hint, value)
|
||||
|
||||
if hint is date and isinstance(value, str):
|
||||
return date.fromisoformat(value)
|
||||
|
||||
return value
|
||||
2
datatypes/epc/schema/tests/fixtures/historic_epc.csv
vendored
Normal file
2
datatypes/epc/schema/tests/fixtures/historic_epc.csv
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
LMK_KEY,ADDRESS1,ADDRESS2,ADDRESS3,POSTCODE,BUILDING_REFERENCE_NUMBER,CURRENT_ENERGY_RATING,POTENTIAL_ENERGY_RATING,CURRENT_ENERGY_EFFICIENCY,POTENTIAL_ENERGY_EFFICIENCY,PROPERTY_TYPE,BUILT_FORM,INSPECTION_DATE,LOCAL_AUTHORITY,CONSTITUENCY,COUNTY,LODGEMENT_DATE,TRANSACTION_TYPE,ENVIRONMENT_IMPACT_CURRENT,ENVIRONMENT_IMPACT_POTENTIAL,ENERGY_CONSUMPTION_CURRENT,ENERGY_CONSUMPTION_POTENTIAL,CO2_EMISSIONS_CURRENT,CO2_EMISS_CURR_PER_FLOOR_AREA,CO2_EMISSIONS_POTENTIAL,LIGHTING_COST_CURRENT,LIGHTING_COST_POTENTIAL,HEATING_COST_CURRENT,HEATING_COST_POTENTIAL,HOT_WATER_COST_CURRENT,HOT_WATER_COST_POTENTIAL,TOTAL_FLOOR_AREA,ENERGY_TARIFF,MAINS_GAS_FLAG,FLOOR_LEVEL,FLAT_TOP_STOREY,FLAT_STOREY_COUNT,MAIN_HEATING_CONTROLS,MULTI_GLAZE_PROPORTION,GLAZED_TYPE,GLAZED_AREA,EXTENSION_COUNT,NUMBER_HABITABLE_ROOMS,NUMBER_HEATED_ROOMS,LOW_ENERGY_LIGHTING,NUMBER_OPEN_FIREPLACES,HOTWATER_DESCRIPTION,HOT_WATER_ENERGY_EFF,HOT_WATER_ENV_EFF,FLOOR_DESCRIPTION,FLOOR_ENERGY_EFF,FLOOR_ENV_EFF,WINDOWS_DESCRIPTION,WINDOWS_ENERGY_EFF,WINDOWS_ENV_EFF,WALLS_DESCRIPTION,WALLS_ENERGY_EFF,WALLS_ENV_EFF,SECONDHEAT_DESCRIPTION,SHEATING_ENERGY_EFF,SHEATING_ENV_EFF,ROOF_DESCRIPTION,ROOF_ENERGY_EFF,ROOF_ENV_EFF,MAINHEAT_DESCRIPTION,MAINHEAT_ENERGY_EFF,MAINHEAT_ENV_EFF,MAINHEATCONT_DESCRIPTION,MAINHEATC_ENERGY_EFF,MAINHEATC_ENV_EFF,LIGHTING_DESCRIPTION,LIGHTING_ENERGY_EFF,LIGHTING_ENV_EFF,MAIN_FUEL,WIND_TURBINE_COUNT,HEAT_LOSS_CORRIDOR,UNHEATED_CORRIDOR_LENGTH,FLOOR_HEIGHT,PHOTO_SUPPLY,SOLAR_WATER_HEATING_FLAG,MECHANICAL_VENTILATION,ADDRESS,LOCAL_AUTHORITY_LABEL,CONSTITUENCY_LABEL,POSTTOWN,CONSTRUCTION_AGE_BAND,LODGEMENT_DATETIME,TENURE,FIXED_LIGHTING_OUTLETS_COUNT,LOW_ENERGY_FIXED_LIGHT_COUNT,UPRN,UPRN_SOURCE,REPORT_TYPE
|
||||
9292c3bf26a8876ce59274401ea73e3de5bd0b3e52a507c2162a46e57db8ea2f,47 GORDON ROAD,ALFORD,,AB33 8AL,10001111325,E,B,42,87,House,Semi-Detached,2021-04-11,,Unknown,,2021-04-12,ECO assessment,49,69,450,299,5.5,76,3.6,69,77,1579,715,349,118,72.0,Single,N,,,,,100.0,"double glazing, unknown install date",Normal,0.0,3.0,3.0,86.0,0.0,"Electric immersion, standard tariff",Very Poor,Poor,"Solid, no insulation (assumed)",,,Fully double glazed,Average,Average,"Granite or whinstone, as built, partial insulation (assumed)",Average,Average,,,,"Pitched, 100 mm loft insulation",Average,Average,"Room heaters, electric",Very Poor,Poor,Appliance thermostats,Good,Good,Low energy lighting in 86% of fixed outlets,Very Good,Very Good,electricity (not community),0.0,,,2.4,0.0,N,natural,"47 GORDON ROAD, ALFORD",,,ALFORD,England and Wales: 1976-1982,2021-04-12 21:45:35,Rented (private),7.0,,151020766.0,Energy Assessor,100
|
||||
|
|
|
@ -1,77 +1,3 @@
|
|||
import dataclasses
|
||||
import typing
|
||||
from datetime import date
|
||||
from typing import Any, Dict, Type, TypeVar
|
||||
from datatypes.epc.schema.helpers import from_dict
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def from_dict(cls: Type[T], data: Dict[str, Any]) -> T:
|
||||
"""
|
||||
Recursively convert a plain dict (e.g. from json.loads) into the given
|
||||
dataclass type, using the field type hints to convert nested structures.
|
||||
|
||||
Handles:
|
||||
- Nested dataclasses
|
||||
- List[SomeDataclass]
|
||||
- Optional[X] / Union[X, None]
|
||||
- Union[DataclassType, primitive] (e.g. Union[Measurement, int])
|
||||
- Primitive pass-through for Union[str, int] etc.
|
||||
"""
|
||||
return _from_dict_impl(cls, data) # type: ignore[return-value]
|
||||
|
||||
|
||||
def _from_dict_impl(cls: Any, data: Any) -> Any:
|
||||
hints = typing.get_type_hints(cls)
|
||||
kwargs: Dict[str, Any] = {}
|
||||
|
||||
for field in dataclasses.fields(cls): # type: ignore[arg-type]
|
||||
has_default = (
|
||||
field.default is not dataclasses.MISSING
|
||||
or field.default_factory is not dataclasses.MISSING # type: ignore[misc]
|
||||
)
|
||||
if field.name not in data:
|
||||
if has_default:
|
||||
continue
|
||||
raise ValueError(f"{cls.__name__}: missing required field '{field.name}'")
|
||||
|
||||
kwargs[field.name] = _coerce(data[field.name], hints[field.name])
|
||||
|
||||
return cls(**kwargs)
|
||||
|
||||
|
||||
def _coerce(value: Any, hint: Any) -> Any:
|
||||
if value is None:
|
||||
return None
|
||||
|
||||
origin = typing.get_origin(hint)
|
||||
args = typing.get_args(hint)
|
||||
|
||||
# Union (includes Optional[X] which is Union[X, None])
|
||||
if origin is typing.Union:
|
||||
if value is None:
|
||||
return None
|
||||
non_none_args = [a for a in args if a is not type(None)]
|
||||
if len(non_none_args) == 1:
|
||||
# Optional[X] — recurse so List[X] and nested dataclasses are handled
|
||||
return _coerce(value, non_none_args[0])
|
||||
# Multi-type Union (e.g. Union[Measurement, int]): try dataclasses first
|
||||
for arg in non_none_args:
|
||||
if dataclasses.is_dataclass(arg) and isinstance(value, dict):
|
||||
return _from_dict_impl(arg, value)
|
||||
# All remaining args are primitives — return value as-is
|
||||
return value
|
||||
|
||||
# List[X]
|
||||
if origin is list:
|
||||
item_hint = args[0]
|
||||
return [_coerce(item, item_hint) for item in value]
|
||||
|
||||
# Plain dataclass
|
||||
if dataclasses.is_dataclass(hint) and isinstance(value, dict):
|
||||
return _from_dict_impl(hint, value)
|
||||
|
||||
if hint is date and isinstance(value, str):
|
||||
return date.fromisoformat(value)
|
||||
|
||||
return value
|
||||
__all__ = ["from_dict"]
|
||||
|
|
|
|||
49
datatypes/epc/schema/tests/test_historic_epc_loading.py
Normal file
49
datatypes/epc/schema/tests/test_historic_epc_loading.py
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from datatypes.epc.loaders.historic_epc import read_historic_epc_csv
|
||||
from datatypes.epc.domain.historic_epc import HistoricEpc
|
||||
|
||||
FIXTURES = os.path.join(os.path.dirname(__file__), "fixtures")
|
||||
|
||||
|
||||
class TestHistoricEpcLoading:
|
||||
|
||||
@pytest.fixture
|
||||
def epc(self) -> HistoricEpc:
|
||||
rows = read_historic_epc_csv(os.path.join(FIXTURES, "historic_epc.csv"))
|
||||
return rows[0]
|
||||
|
||||
def test_returns_historic_epc_instance(self, epc: HistoricEpc) -> None:
|
||||
assert isinstance(epc, HistoricEpc)
|
||||
|
||||
def test_lmk_key(self, epc: HistoricEpc) -> None:
|
||||
assert epc.lmk_key == "9292c3bf26a8876ce59274401ea73e3de5bd0b3e52a507c2162a46e57db8ea2f"
|
||||
|
||||
def test_address1(self, epc: HistoricEpc) -> None:
|
||||
assert epc.address1 == "47 GORDON ROAD"
|
||||
|
||||
def test_postcode(self, epc: HistoricEpc) -> None:
|
||||
assert epc.postcode == "AB33 8AL"
|
||||
|
||||
def test_current_energy_rating(self, epc: HistoricEpc) -> None:
|
||||
assert epc.current_energy_rating == "E"
|
||||
|
||||
def test_property_type(self, epc: HistoricEpc) -> None:
|
||||
assert epc.property_type == "House"
|
||||
|
||||
def test_built_form(self, epc: HistoricEpc) -> None:
|
||||
assert epc.built_form == "Semi-Detached"
|
||||
|
||||
def test_inspection_date(self, epc: HistoricEpc) -> None:
|
||||
assert epc.inspection_date == "2021-04-11"
|
||||
|
||||
def test_uprn(self, epc: HistoricEpc) -> None:
|
||||
assert epc.uprn == "151020766.0"
|
||||
|
||||
def test_uprn_source(self, epc: HistoricEpc) -> None:
|
||||
assert epc.uprn_source == "Energy Assessor"
|
||||
|
||||
def test_report_type(self, epc: HistoricEpc) -> None:
|
||||
assert epc.report_type == "100"
|
||||
3
datatypes/epc/search/__init__.py
Normal file
3
datatypes/epc/search/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
from datatypes.epc.search.epc_search_result import EpcSearchResult
|
||||
|
||||
__all__ = ["EpcSearchResult"]
|
||||
28
datatypes/epc/search/epc_search_result.py
Normal file
28
datatypes/epc/search/epc_search_result.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class EpcSearchResult:
|
||||
certificate_number: str
|
||||
address_line_1: str
|
||||
address_line_2: Optional[str]
|
||||
address_line_3: Optional[str]
|
||||
address_line_4: Optional[str]
|
||||
postcode: str
|
||||
post_town: str
|
||||
uprn: Optional[int]
|
||||
current_energy_efficiency_band: str
|
||||
registration_date: str
|
||||
|
||||
@property
|
||||
def full_address(self) -> str:
|
||||
parts = [
|
||||
self.address_line_1,
|
||||
self.address_line_2,
|
||||
self.address_line_3,
|
||||
self.address_line_4,
|
||||
]
|
||||
return ", ".join(p for p in parts if p)
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
import time
|
||||
from enum import Enum
|
||||
from http import HTTPStatus
|
||||
from typing import Optional, cast, Callable, Any
|
||||
|
||||
from hubspot.client import Client # type: ignore[reportMissingTypeStubs]
|
||||
|
|
@ -86,19 +87,27 @@ class HubspotClient:
|
|||
|
||||
def _call_with_retry(self, fn: Callable[[], Any], max_retries: int = 2) -> Any:
|
||||
"""
|
||||
Call fn(), retrying up to max_retries times on 429 rate-limit errors.
|
||||
Call fn(), retrying up to max_retries times on 429 rate-limit errors
|
||||
or transient 5xx server errors.
|
||||
Waits the minimal amount: the remaining interval window reported by HubSpot headers.
|
||||
Falls back to the full interval (10s) if headers are absent.
|
||||
|
||||
Note: each HubSpot sub-module (deals, companies, etc.) ships its own ApiException
|
||||
class with no shared base beyond Exception, so we detect 429s via duck-typing.
|
||||
class with no shared base beyond Exception, so we detect retryable statuses via duck-typing.
|
||||
"""
|
||||
retryable_statuses = {
|
||||
HTTPStatus.TOO_MANY_REQUESTS,
|
||||
HTTPStatus.INTERNAL_SERVER_ERROR,
|
||||
HTTPStatus.BAD_GATEWAY,
|
||||
HTTPStatus.SERVICE_UNAVAILABLE,
|
||||
HTTPStatus.GATEWAY_TIMEOUT,
|
||||
}
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
return fn()
|
||||
except Exception as e:
|
||||
status = getattr(e, "status", None)
|
||||
if status != 429 or attempt == max_retries:
|
||||
if status not in retryable_statuses or attempt == max_retries:
|
||||
raise
|
||||
headers = getattr(e, "headers", None) or {}
|
||||
interval_ms = int(
|
||||
|
|
@ -106,7 +115,7 @@ class HubspotClient:
|
|||
)
|
||||
wait_s = interval_ms / 1000.0
|
||||
self.logger.warning(
|
||||
f"HubSpot 429 (attempt {attempt + 1}/{max_retries}), "
|
||||
f"HubSpot {status} (attempt {attempt + 1}/{max_retries}), "
|
||||
f"waiting {wait_s:.1f}s before retry."
|
||||
)
|
||||
time.sleep(wait_s)
|
||||
|
|
|
|||
|
|
@ -162,6 +162,14 @@ class HubspotDealDiffer:
|
|||
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def check_for_magicplan_trigger(
|
||||
new_deal: Dict[str, str], old_deal: HubspotDealData
|
||||
) -> bool:
|
||||
new_outcome = (new_deal.get("outcome") or "").lower()
|
||||
old_outcome = (old_deal.outcome or "").lower()
|
||||
return new_outcome == "surveyed" and old_outcome != "surveyed"
|
||||
|
||||
@staticmethod
|
||||
def _has_valid_pashub_link(new_pashub_link: str) -> bool:
|
||||
return bool(new_pashub_link)
|
||||
|
|
@ -178,7 +186,7 @@ class HubspotDealDiffer:
|
|||
def _coordination_completed(
|
||||
new_deal: Dict[str, str], old_deal: HubspotDealData
|
||||
) -> bool:
|
||||
new_status: str = new_deal.get("coordination_status") or ""
|
||||
new_status: str = new_deal.get("coordination_status__stage_1_") or ""
|
||||
return (
|
||||
new_status != ""
|
||||
and new_status.lower() in HubspotDealDiffer.COORDINATION_COMPLETE
|
||||
|
|
@ -187,7 +195,7 @@ class HubspotDealDiffer:
|
|||
|
||||
@staticmethod
|
||||
def _design_completed(new_deal: Dict[str, str], old_deal: HubspotDealData) -> bool:
|
||||
new_status: str = new_deal.get("design_status") or ""
|
||||
new_status: str = new_deal.get("retrofit_design_status") or ""
|
||||
return (
|
||||
new_status != ""
|
||||
and new_status.lower() == HubspotDealDiffer.RETROFIT_DESIGN_COMPLETE
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from etl.hubspot.hubspot_deal_differ import HubspotDealDiffer
|
|||
from etl.hubspot.hubspot_trigger_orchestrator_trigger_request import (
|
||||
HubspotTriggerOrchestratorTriggerRequest,
|
||||
)
|
||||
from backend.app.db.models.tasks import SourceEnum
|
||||
from backend.utils.subtasks import task_handler
|
||||
from backend.app.db.models.hubspot_deal_data import HubspotDealData
|
||||
from utils.logger import setup_logger
|
||||
|
|
@ -16,7 +17,7 @@ from utils.logger import setup_logger
|
|||
logger = setup_logger()
|
||||
|
||||
|
||||
@task_handler()
|
||||
@task_handler(task_source="hubspot_scraper", source=SourceEnum.HUBSPOT_DEAL)
|
||||
def handler(body: dict[str, Any], context: Any) -> None:
|
||||
db_client = HubspotDataToDb()
|
||||
hubspot_client = HubspotClient()
|
||||
|
|
@ -56,6 +57,12 @@ def handler(body: dict[str, Any], context: Any) -> None:
|
|||
f"Triggering Pas Hub file fetcher for HubSpot deal ID {hubspot_deal_id}"
|
||||
)
|
||||
_trigger_pashub_fetcher(sqs_client, hubspot_deal_id, hubspot_deal)
|
||||
|
||||
if (hubspot_deal.get("outcome") or "").lower() == "surveyed":
|
||||
logger.info(
|
||||
f"Triggering MagicPlan fetcher for HubSpot deal ID {hubspot_deal_id}"
|
||||
)
|
||||
_trigger_magicplan_fetcher(sqs_client, hubspot_deal, listing, hubspot_deal_id)
|
||||
else:
|
||||
# Deal already in db, check whether anything has changed
|
||||
logger.info(
|
||||
|
|
@ -97,9 +104,34 @@ def handler(body: dict[str, Any], context: Any) -> None:
|
|||
f"Not Triggering PasHub file fetcher for HubSpot deal ID {hubspot_deal_id}"
|
||||
)
|
||||
|
||||
if HubspotDealDiffer.check_for_magicplan_trigger(
|
||||
new_deal=hubspot_deal, old_deal=db_deal
|
||||
):
|
||||
logger.info(
|
||||
f"Triggering MagicPlan fetcher for HubSpot deal ID {hubspot_deal_id}"
|
||||
)
|
||||
_trigger_magicplan_fetcher(sqs_client, hubspot_deal, listing, hubspot_deal_id)
|
||||
|
||||
print("done")
|
||||
|
||||
|
||||
def _trigger_magicplan_fetcher(
|
||||
sqs_client: Any, hubspot_deal: Dict[str, str], listing: Optional[dict[str, str]], hubspot_deal_id: str
|
||||
) -> None:
|
||||
message_body = {
|
||||
"address": hubspot_deal.get("dealname"),
|
||||
"hubspot_deal_id": hubspot_deal_id,
|
||||
"uprn": listing.get("national_uprn") if listing else None,
|
||||
}
|
||||
response = sqs_client.send_message(
|
||||
QueueUrl=get_settings().MAGICPLAN_SQS_URL,
|
||||
MessageBody=json.dumps(message_body),
|
||||
)
|
||||
logger.info(
|
||||
f"Sent message to MagicPlan queue. MessageId: {response['MessageId']}"
|
||||
)
|
||||
|
||||
|
||||
def _trigger_pashub_fetcher(
|
||||
sqs_client: Any, deal_id: str, hubspot_deal: Dict[str, str]
|
||||
) -> None:
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ def test_pashub_trigger__coordination_completed_and_pashub_link_set__returns_tru
|
|||
new_deal = make_new_deal(
|
||||
deal_id,
|
||||
pashub_link="www.google.co.uk",
|
||||
coordination_status=coordination_status,
|
||||
**{"coordination_status__stage_1_": coordination_status},
|
||||
)
|
||||
|
||||
assert (
|
||||
|
|
@ -156,7 +156,7 @@ def test_pashub_trigger__design_completed_and_pashub_link_set__returns_true() ->
|
|||
new_deal = make_new_deal(
|
||||
deal_id,
|
||||
pashub_link="www.google.co.uk",
|
||||
design_status="uploaded",
|
||||
retrofit_design_status="uploaded",
|
||||
)
|
||||
|
||||
assert (
|
||||
|
|
@ -177,7 +177,7 @@ def test_pashub_trigger__design_completed_and_pashub_link_not_set__returns_false
|
|||
|
||||
new_deal = make_new_deal(
|
||||
deal_id,
|
||||
design_status="uploaded",
|
||||
retrofit_design_status="uploaded",
|
||||
)
|
||||
|
||||
assert (
|
||||
|
|
@ -270,6 +270,79 @@ def test_pashub_trigger__coordination_design_lodgement_not_completed_and_pashub_
|
|||
)
|
||||
|
||||
|
||||
# ==========================
|
||||
# MAGICPLAN TRIGGER TESTS
|
||||
# ==========================
|
||||
|
||||
|
||||
def test_magicplan_trigger__outcome_transitions_to_surveyed__returns_true() -> None:
|
||||
deal_id = uuid.uuid4()
|
||||
|
||||
# Arrange
|
||||
old_deal = make_old_deal(id=deal_id, outcome="assessed")
|
||||
new_deal = make_new_deal(deal_id, outcome="surveyed")
|
||||
|
||||
# Act
|
||||
result = HubspotDealDiffer.check_for_magicplan_trigger(
|
||||
new_deal=new_deal,
|
||||
old_deal=old_deal,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result is True
|
||||
|
||||
|
||||
def test_magicplan_trigger__outcome_already_surveyed__returns_false() -> None:
|
||||
deal_id = uuid.uuid4()
|
||||
|
||||
# Arrange
|
||||
old_deal = make_old_deal(id=deal_id, outcome="surveyed")
|
||||
new_deal = make_new_deal(deal_id, outcome="surveyed")
|
||||
|
||||
# Act
|
||||
result = HubspotDealDiffer.check_for_magicplan_trigger(
|
||||
new_deal=new_deal,
|
||||
old_deal=old_deal,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result is False
|
||||
|
||||
|
||||
def test_magicplan_trigger__outcome_transitions_to_non_surveyed__returns_false() -> None:
|
||||
deal_id = uuid.uuid4()
|
||||
|
||||
# Arrange
|
||||
old_deal = make_old_deal(id=deal_id, outcome="assessed")
|
||||
new_deal = make_new_deal(deal_id, outcome="assessed")
|
||||
|
||||
# Act
|
||||
result = HubspotDealDiffer.check_for_magicplan_trigger(
|
||||
new_deal=new_deal,
|
||||
old_deal=old_deal,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result is False
|
||||
|
||||
|
||||
def test_magicplan_trigger__outcome_surveyed_uppercase__returns_true() -> None:
|
||||
deal_id = uuid.uuid4()
|
||||
|
||||
# Arrange
|
||||
old_deal = make_old_deal(id=deal_id, outcome="assessed")
|
||||
new_deal = make_new_deal(deal_id, outcome="SURVEYED")
|
||||
|
||||
# Act
|
||||
result = HubspotDealDiffer.check_for_magicplan_trigger(
|
||||
new_deal=new_deal,
|
||||
old_deal=old_deal,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result is True
|
||||
|
||||
|
||||
# =======================
|
||||
# DB UPDATE TRIGGER TESTS
|
||||
# =======================
|
||||
|
|
|
|||
227
etl/hubspot/tests/test_scraper_handler.py
Normal file
227
etl/hubspot/tests/test_scraper_handler.py
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
import json
|
||||
import uuid
|
||||
from typing import Any, Dict, Optional
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from backend.app.db.models.hubspot_deal_data import HubspotDealData
|
||||
from etl.hubspot.scripts.scraper.main import handler
|
||||
|
||||
DEAL_NAME = "123 Main Street"
|
||||
UPRN = "12345678"
|
||||
DEAL_ID = "999"
|
||||
PASHUB_LINK = "https://pashub.example.com/deal/999"
|
||||
MAGICPLAN_QUEUE_URL = "https://sqs.eu-west-2.amazonaws.com/123/magic-plan-dev"
|
||||
PASHUB_QUEUE_URL = "https://sqs.test/pashub"
|
||||
|
||||
|
||||
def make_hubspot_deal(**kwargs: Any) -> Dict[str, Any]:
|
||||
return {
|
||||
"hs_object_id": DEAL_ID,
|
||||
"dealname": DEAL_NAME,
|
||||
"pashub_link": None,
|
||||
**kwargs,
|
||||
}
|
||||
|
||||
|
||||
def make_db_deal(**kwargs: Any) -> HubspotDealData:
|
||||
return HubspotDealData(
|
||||
id=uuid.uuid4(),
|
||||
deal_id=DEAL_ID,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def run_handler(
|
||||
hubspot_deal: Dict[str, Any],
|
||||
db_deal: Optional[HubspotDealData],
|
||||
listing: Optional[dict],
|
||||
) -> MagicMock:
|
||||
mock_sqs = MagicMock()
|
||||
mock_sqs.send_message.return_value = {"MessageId": "test-id"}
|
||||
|
||||
with (
|
||||
patch("etl.hubspot.scripts.scraper.main.HubspotDataToDb") as mock_db_cls,
|
||||
patch("etl.hubspot.scripts.scraper.main.HubspotClient") as mock_hs_cls,
|
||||
patch("etl.hubspot.scripts.scraper.main.boto3") as mock_boto3,
|
||||
patch("etl.hubspot.scripts.scraper.main.get_settings") as mock_settings,
|
||||
):
|
||||
mock_db_cls.return_value.find_deal_with_deal_id.return_value = db_deal
|
||||
mock_db_cls.return_value.upsert_deal.return_value = None
|
||||
mock_hs_cls.return_value.get_deal_and_company_and_listing.return_value = (
|
||||
hubspot_deal,
|
||||
None,
|
||||
listing,
|
||||
)
|
||||
mock_boto3.client.return_value = mock_sqs
|
||||
mock_settings.return_value.MAGICPLAN_SQS_URL = MAGICPLAN_QUEUE_URL
|
||||
mock_settings.return_value.PASHUB_TO_ARA_SQS_URL = PASHUB_QUEUE_URL
|
||||
|
||||
handler.__wrapped__({"hubspot_deal_id": DEAL_ID}, "")
|
||||
|
||||
return mock_sqs
|
||||
|
||||
|
||||
# ====================================
|
||||
# NEW DEAL PATH - MagicPlan trigger
|
||||
# ====================================
|
||||
|
||||
|
||||
def test_new_deal_surveyed__sends_magicplan_sqs() -> None:
|
||||
# Arrange
|
||||
hubspot_deal = make_hubspot_deal(outcome="surveyed")
|
||||
listing = {"national_uprn": UPRN}
|
||||
|
||||
# Act
|
||||
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=listing)
|
||||
|
||||
# Assert
|
||||
mock_sqs.send_message.assert_called_once_with(
|
||||
QueueUrl=MAGICPLAN_QUEUE_URL,
|
||||
MessageBody=json.dumps(
|
||||
{"address": DEAL_NAME, "hubspot_deal_id": DEAL_ID, "uprn": UPRN}
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_new_deal_not_surveyed__no_magicplan_sqs() -> None:
|
||||
# Arrange
|
||||
hubspot_deal = make_hubspot_deal(outcome="assessed")
|
||||
|
||||
# Act
|
||||
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=None)
|
||||
|
||||
# Assert
|
||||
mock_sqs.send_message.assert_not_called()
|
||||
|
||||
|
||||
def test_new_deal_surveyed_no_listing__magicplan_sqs_uprn_is_null() -> None:
|
||||
# Arrange
|
||||
hubspot_deal = make_hubspot_deal(outcome="surveyed")
|
||||
|
||||
# Act
|
||||
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=None)
|
||||
|
||||
# Assert
|
||||
mock_sqs.send_message.assert_called_once_with(
|
||||
QueueUrl=MAGICPLAN_QUEUE_URL,
|
||||
MessageBody=json.dumps(
|
||||
{"address": DEAL_NAME, "hubspot_deal_id": DEAL_ID, "uprn": None}
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ==========================================
|
||||
# EXISTING DEAL PATH - MagicPlan trigger
|
||||
# ==========================================
|
||||
|
||||
|
||||
def test_existing_deal_surveyed_transition__sends_magicplan_sqs() -> None:
|
||||
# Arrange
|
||||
db_deal = make_db_deal(outcome="assessed")
|
||||
hubspot_deal = make_hubspot_deal(outcome="surveyed")
|
||||
listing = {"national_uprn": UPRN}
|
||||
|
||||
# Act
|
||||
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=db_deal, listing=listing)
|
||||
|
||||
# Assert
|
||||
mock_sqs.send_message.assert_called_once_with(
|
||||
QueueUrl=MAGICPLAN_QUEUE_URL,
|
||||
MessageBody=json.dumps(
|
||||
{"address": DEAL_NAME, "hubspot_deal_id": DEAL_ID, "uprn": UPRN}
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_existing_deal_already_surveyed__no_magicplan_sqs() -> None:
|
||||
# Arrange
|
||||
db_deal = make_db_deal(outcome="surveyed", dealname="Old Name")
|
||||
hubspot_deal = make_hubspot_deal(outcome="surveyed", dealname="New Name")
|
||||
|
||||
# Act
|
||||
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=db_deal, listing=None)
|
||||
|
||||
# Assert
|
||||
mock_sqs.send_message.assert_not_called()
|
||||
|
||||
|
||||
# ====================================
|
||||
# NEW DEAL PATH - PasHub trigger
|
||||
# ====================================
|
||||
|
||||
|
||||
def test_new_deal_with_pashub_link__sends_pashub_sqs() -> None:
|
||||
# Arrange
|
||||
hubspot_deal = make_hubspot_deal(pashub_link=PASHUB_LINK)
|
||||
|
||||
# Act
|
||||
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=None)
|
||||
|
||||
# Assert
|
||||
mock_sqs.send_message.assert_called_once_with(
|
||||
QueueUrl=PASHUB_QUEUE_URL,
|
||||
MessageBody=json.dumps(
|
||||
{
|
||||
"pashub_link": PASHUB_LINK,
|
||||
"address": None,
|
||||
"hubspot_deal_id": DEAL_ID,
|
||||
"sharepoint_link": None,
|
||||
"uprn": None,
|
||||
"landlord_property_id": None,
|
||||
"deal_stage": None,
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_new_deal_no_pashub_link__no_pashub_sqs() -> None:
|
||||
# Arrange
|
||||
hubspot_deal = make_hubspot_deal()
|
||||
|
||||
# Act
|
||||
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=None, listing=None)
|
||||
|
||||
# Assert
|
||||
mock_sqs.send_message.assert_not_called()
|
||||
|
||||
|
||||
# ==========================================
|
||||
# EXISTING DEAL PATH - PasHub trigger
|
||||
# ==========================================
|
||||
|
||||
|
||||
def test_existing_deal_pashub_link_added__sends_pashub_sqs() -> None:
|
||||
# Arrange
|
||||
db_deal = make_db_deal(pashub_link=None)
|
||||
hubspot_deal = make_hubspot_deal(pashub_link=PASHUB_LINK)
|
||||
|
||||
# Act
|
||||
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=db_deal, listing=None)
|
||||
|
||||
# Assert
|
||||
mock_sqs.send_message.assert_called_once_with(
|
||||
QueueUrl=PASHUB_QUEUE_URL,
|
||||
MessageBody=json.dumps(
|
||||
{
|
||||
"pashub_link": PASHUB_LINK,
|
||||
"address": None,
|
||||
"hubspot_deal_id": DEAL_ID,
|
||||
"sharepoint_link": None,
|
||||
"uprn": None,
|
||||
"landlord_property_id": None,
|
||||
"deal_stage": None,
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_existing_deal_pashub_link_unchanged__no_pashub_sqs() -> None:
|
||||
# Arrange
|
||||
db_deal = make_db_deal(pashub_link=PASHUB_LINK, dealname="Old Name")
|
||||
hubspot_deal = make_hubspot_deal(pashub_link=PASHUB_LINK, dealname="New Name")
|
||||
|
||||
# Act
|
||||
mock_sqs = run_handler(hubspot_deal=hubspot_deal, db_deal=db_deal, listing=None)
|
||||
|
||||
# Assert
|
||||
mock_sqs.send_message.assert_not_called()
|
||||
|
|
@ -12,7 +12,16 @@ data "terraform_remote_state" "pashub_to_ara" {
|
|||
config = {
|
||||
bucket = "pashub-to-ara-terraform-state"
|
||||
key = "env:/${var.stage}/terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
}
|
||||
|
||||
data "terraform_remote_state" "magic_plan" {
|
||||
backend = "s3"
|
||||
config = {
|
||||
bucket = "magic-plan-client-terraform-state"
|
||||
key = "env:/${var.stage}/terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -49,6 +58,7 @@ module "hubspot_deal_etl" {
|
|||
HUBSPOT_API_KEY = var.hubspot_api_key
|
||||
|
||||
PASHUB_TO_ARA_SQS_URL = data.terraform_remote_state.pashub_to_ara.outputs.pashub_to_ara_queue_url
|
||||
MAGICPLAN_SQS_URL = data.terraform_remote_state.magic_plan.outputs.magic_plan_queue_url
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -76,4 +86,18 @@ module "hubspot_deal_etl_sqs_policy" {
|
|||
resource "aws_iam_role_policy_attachment" "hubspot_deal_etl_sqs_send" {
|
||||
role = module.hubspot_deal_etl.role_name
|
||||
policy_arn = module.hubspot_deal_etl_sqs_policy.policy_arn
|
||||
}
|
||||
|
||||
module "hubspot_deal_etl_magicplan_sqs_policy" {
|
||||
source = "../../modules/general_iam_policy"
|
||||
|
||||
policy_name = "hubspot-deal-etl-magicplan-sqs-send-${var.stage}"
|
||||
policy_description = "Allow HubSpot ETL Lambda to send messages to MagicPlan queue"
|
||||
actions = ["sqs:SendMessage"]
|
||||
resources = [data.terraform_remote_state.magic_plan.outputs.magic_plan_queue_arn]
|
||||
}
|
||||
|
||||
resource "aws_iam_role_policy_attachment" "hubspot_deal_etl_magicplan_sqs_send" {
|
||||
role = module.hubspot_deal_etl.role_name
|
||||
policy_arn = module.hubspot_deal_etl_magicplan_sqs_policy.policy_arn
|
||||
}
|
||||
46
infrastructure/terraform/lambda/magic_plan/main.tf
Normal file
46
infrastructure/terraform/lambda/magic_plan/main.tf
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
data "terraform_remote_state" "shared" {
|
||||
backend = "s3"
|
||||
config = {
|
||||
bucket = "assessment-model-terraform-state"
|
||||
key = "env:/${var.stage}/terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
}
|
||||
|
||||
data "aws_secretsmanager_secret_version" "db_credentials" {
|
||||
secret_id = "${var.stage}/assessment_model/db_credentials"
|
||||
}
|
||||
|
||||
locals {
|
||||
db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
|
||||
}
|
||||
|
||||
resource "aws_iam_role_policy_attachment" "magic_plan_s3_write" {
|
||||
role = module.lambda.role_name
|
||||
policy_arn = data.terraform_remote_state.shared.outputs.energy_assessments_s3_write_arn
|
||||
}
|
||||
|
||||
module "lambda" {
|
||||
source = "../../modules/lambda_with_sqs"
|
||||
|
||||
name = "magic_plan"
|
||||
stage = var.stage
|
||||
|
||||
image_uri = local.image_uri
|
||||
|
||||
maximum_concurrency = var.maximum_concurrency
|
||||
reserved_concurrent_executions = var.reserved_concurrent_executions
|
||||
batch_size = var.batch_size
|
||||
|
||||
environment = {
|
||||
STAGE = var.stage
|
||||
LOG_LEVEL = "info"
|
||||
MAGICPLAN_CUSTOMER_ID = var.magicplan_customer_id
|
||||
MAGICPLAN_API_KEY = var.magicplan_api_key
|
||||
DB_USERNAME = local.db_credentials.db_assessment_model_username
|
||||
DB_PASSWORD = local.db_credentials.db_assessment_model_password
|
||||
DB_HOST = var.db_host
|
||||
DB_NAME = var.db_name
|
||||
DB_PORT = var.db_port
|
||||
}
|
||||
}
|
||||
9
infrastructure/terraform/lambda/magic_plan/outputs.tf
Normal file
9
infrastructure/terraform/lambda/magic_plan/outputs.tf
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
output "magic_plan_queue_url" {
|
||||
value = module.lambda.queue_url
|
||||
description = "URL of the MagicPlan SQS queue"
|
||||
}
|
||||
|
||||
output "magic_plan_queue_arn" {
|
||||
value = module.lambda.queue_arn
|
||||
description = "ARN of the MagicPlan SQS queue"
|
||||
}
|
||||
16
infrastructure/terraform/lambda/magic_plan/provider.tf
Normal file
16
infrastructure/terraform/lambda/magic_plan/provider.tf
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 5.0"
|
||||
}
|
||||
}
|
||||
|
||||
backend "s3" {
|
||||
bucket = "magic-plan-client-terraform-state"
|
||||
key = "terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
|
||||
required_version = ">= 1.2.0"
|
||||
}
|
||||
68
infrastructure/terraform/lambda/magic_plan/variables.tf
Normal file
68
infrastructure/terraform/lambda/magic_plan/variables.tf
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
variable "lambda_name" {
|
||||
type = string
|
||||
description = "Logical name of the lambda"
|
||||
}
|
||||
|
||||
variable "stage" {
|
||||
description = "Deployment stage (e.g. dev, prod)"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "ecr_repo_url" {
|
||||
type = string
|
||||
description = "ECR repository URL (no tag, no digest)"
|
||||
}
|
||||
|
||||
variable "image_digest" {
|
||||
type = string
|
||||
description = "Image digest (sha256:...)"
|
||||
}
|
||||
|
||||
variable "maximum_concurrency" {
|
||||
type = number
|
||||
default = null
|
||||
description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit."
|
||||
}
|
||||
|
||||
variable "reserved_concurrent_executions" {
|
||||
type = number
|
||||
default = 1
|
||||
}
|
||||
|
||||
variable "batch_size" {
|
||||
type = number
|
||||
default = 1
|
||||
}
|
||||
|
||||
locals {
|
||||
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
|
||||
}
|
||||
|
||||
output "resolved_image_uri" {
|
||||
value = local.image_uri
|
||||
}
|
||||
|
||||
variable "magicplan_customer_id" {
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "magicplan_api_key" {
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "db_host" {
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "db_name" {
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
variable "db_port" {
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
|
@ -54,5 +54,5 @@ module "lambda" {
|
|||
|
||||
resource "aws_iam_role_policy_attachment" "pashub_to_ara_s3_write" {
|
||||
role = module.lambda.role_name
|
||||
policy_arn = data.terraform_remote_state.shared.outputs.pashub_to_ara_s3_write_arn
|
||||
policy_arn = data.terraform_remote_state.shared.outputs.energy_assessments_s3_write_arn
|
||||
}
|
||||
|
|
|
|||
|
|
@ -280,6 +280,21 @@ output "retrofit_energy_assessments_bucket_name" {
|
|||
description = "Name of the retrofit energy assessments bucket"
|
||||
}
|
||||
|
||||
module "energy_assessments_s3_write" {
|
||||
source = "../modules/s3_iam_policy"
|
||||
|
||||
policy_name = "EnergyAssessmentsWriteS3"
|
||||
policy_description = "Allow lambdas to write to retrofit energy assessments bucket"
|
||||
bucket_arns = ["arn:aws:s3:::retrofit-energy-assessments-${var.stage}"]
|
||||
actions = ["s3:PutObject", "s3:AbortMultipartUpload"]
|
||||
resource_paths = ["/*"]
|
||||
}
|
||||
|
||||
output "energy_assessments_s3_write_arn" {
|
||||
value = module.energy_assessments_s3_write.policy_arn
|
||||
}
|
||||
|
||||
|
||||
|
||||
# Set up the route53 record for the API
|
||||
module "route53" {
|
||||
|
|
@ -568,6 +583,7 @@ module "pashub_to_ara_registry" {
|
|||
stage = var.stage
|
||||
}
|
||||
|
||||
#### TEMP - need to unattach from entities before this can be delete ####
|
||||
module "pashub_to_ara_s3_write" {
|
||||
source = "../modules/s3_iam_policy"
|
||||
|
||||
|
|
@ -745,4 +761,5 @@ module "magic_plan_client_registry" {
|
|||
source = "../modules/container_registry"
|
||||
name = "magic-plan"
|
||||
stage = var.stage
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1 @@
|
|||
[tool.pyright]
|
||||
reportUnknownMemberType = false
|
||||
reportUnknownVariableType = false
|
||||
|
|
@ -3,6 +3,6 @@ pythonpath = .
|
|||
log_cli = true
|
||||
log_cli_level = INFO
|
||||
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
|
||||
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests backend/hubspot_trigger_orchestrator/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/ backend/pashub_fetcher/tests backend/documents_parser/tests backend/magic_plan/tests datatypes/magicplan/api/tests datatypes/magicplan/domain/tests backend/app/db/functions/tests
|
||||
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests etl/hubspot/tests datatypes/epc/schema/tests datatypes/epc/surveys/tests datatypes/epc/domain/tests backend/ecmk_fetcher/tests/ backend/pashub_fetcher/tests backend/documents_parser/tests backend/magic_plan/tests datatypes/magicplan/api/tests datatypes/magicplan/domain/tests backend/app/db/functions/tests
|
||||
markers =
|
||||
integration: mark a test as an integration test
|
||||
|
|
|
|||
|
|
@ -21,28 +21,28 @@ regional_labour_variations = [
|
|||
{"Region": "Yorkshire and the Humber", "Adjustment_Factor": 0.86},
|
||||
{"Region": "Wales", "Adjustment_Factor": 0.88},
|
||||
{"Region": "Scotland", "Adjustment_Factor": 0.88},
|
||||
{"Region": "Northern Ireland", "Adjustment_Factor": 0.76}
|
||||
{"Region": "Northern Ireland", "Adjustment_Factor": 0.76},
|
||||
]
|
||||
|
||||
# Installers are now working with 435 watt panels
|
||||
PANEL_SIZE = 0.435
|
||||
|
||||
INSTALLER_SOLAR_COSTS = [
|
||||
{'n_panels': 4, 'array_kwp': 4 * PANEL_SIZE, 'cost': 4089.25, 'installer': 'CEG'},
|
||||
{'n_panels': 5, 'array_kwp': 5 * PANEL_SIZE, 'cost': 4242.48, 'installer': 'CEG'},
|
||||
{'n_panels': 6, 'array_kwp': 6 * PANEL_SIZE, 'cost': 4395.71, 'installer': 'CEG'},
|
||||
{'n_panels': 7, 'array_kwp': 7 * PANEL_SIZE, 'cost': 4548.94, 'installer': 'CEG'},
|
||||
{'n_panels': 8, 'array_kwp': 8 * PANEL_SIZE, 'cost': 4702.17, 'installer': 'CEG'},
|
||||
{'n_panels': 9, 'array_kwp': 9 * PANEL_SIZE, 'cost': 4855.41, 'installer': 'CEG'},
|
||||
{'n_panels': 10, 'array_kwp': 10 * PANEL_SIZE, 'cost': 5010.95, 'installer': 'CEG'},
|
||||
{'n_panels': 11, 'array_kwp': 11 * PANEL_SIZE, 'cost': 5166.49, 'installer': 'CEG'},
|
||||
{'n_panels': 12, 'array_kwp': 12 * PANEL_SIZE, 'cost': 5322.04, 'installer': 'CEG'},
|
||||
{'n_panels': 13, 'array_kwp': 13 * PANEL_SIZE, 'cost': 5657.6, 'installer': 'CEG'},
|
||||
{'n_panels': 14, 'array_kwp': 14 * PANEL_SIZE, 'cost': 5993.16, 'installer': 'CEG'},
|
||||
{'n_panels': 15, 'array_kwp': 15 * PANEL_SIZE, 'cost': 6328.71, 'installer': 'CEG'},
|
||||
{'n_panels': 16, 'array_kwp': 16 * PANEL_SIZE, 'cost': 6483.33, 'installer': 'CEG'},
|
||||
{'n_panels': 17, 'array_kwp': 17 * PANEL_SIZE, 'cost': 6637.95, 'installer': 'CEG'},
|
||||
{'n_panels': 18, 'array_kwp': 18 * PANEL_SIZE, 'cost': 6792.57, 'installer': 'CEG'}
|
||||
{"n_panels": 4, "array_kwp": 4 * PANEL_SIZE, "cost": 4089.25, "installer": "CEG"},
|
||||
{"n_panels": 5, "array_kwp": 5 * PANEL_SIZE, "cost": 4242.48, "installer": "CEG"},
|
||||
{"n_panels": 6, "array_kwp": 6 * PANEL_SIZE, "cost": 4395.71, "installer": "CEG"},
|
||||
{"n_panels": 7, "array_kwp": 7 * PANEL_SIZE, "cost": 4548.94, "installer": "CEG"},
|
||||
{"n_panels": 8, "array_kwp": 8 * PANEL_SIZE, "cost": 4702.17, "installer": "CEG"},
|
||||
{"n_panels": 9, "array_kwp": 9 * PANEL_SIZE, "cost": 4855.41, "installer": "CEG"},
|
||||
{"n_panels": 10, "array_kwp": 10 * PANEL_SIZE, "cost": 5010.95, "installer": "CEG"},
|
||||
{"n_panels": 11, "array_kwp": 11 * PANEL_SIZE, "cost": 5166.49, "installer": "CEG"},
|
||||
{"n_panels": 12, "array_kwp": 12 * PANEL_SIZE, "cost": 5322.04, "installer": "CEG"},
|
||||
{"n_panels": 13, "array_kwp": 13 * PANEL_SIZE, "cost": 5657.6, "installer": "CEG"},
|
||||
{"n_panels": 14, "array_kwp": 14 * PANEL_SIZE, "cost": 5993.16, "installer": "CEG"},
|
||||
{"n_panels": 15, "array_kwp": 15 * PANEL_SIZE, "cost": 6328.71, "installer": "CEG"},
|
||||
{"n_panels": 16, "array_kwp": 16 * PANEL_SIZE, "cost": 6483.33, "installer": "CEG"},
|
||||
{"n_panels": 17, "array_kwp": 17 * PANEL_SIZE, "cost": 6637.95, "installer": "CEG"},
|
||||
{"n_panels": 18, "array_kwp": 18 * PANEL_SIZE, "cost": 6792.57, "installer": "CEG"},
|
||||
]
|
||||
|
||||
# These are costs we received from CRG, for pricing up air source heat pumps
|
||||
|
|
@ -80,7 +80,12 @@ INSTALLER_SOLAR_PV_INVERTER_COST = 7500
|
|||
INSTALLER_SOLAR_PV_INVERTER_LABOUR_COST = 500 # Just a rough guess to labour costs
|
||||
|
||||
INSTALLER_SOLAR_BATTERY_COSTS = [
|
||||
{'capacity_kwh': 5, 'description': 'Battery Add on', 'cost': 3769.89, 'installer': 'JJC'},
|
||||
{
|
||||
"capacity_kwh": 5,
|
||||
"description": "Battery Add on",
|
||||
"cost": 3769.89,
|
||||
"installer": "JJC",
|
||||
},
|
||||
# {'capacity_kwh': 10, 'description': 'Battery Add on', 'cost': 4300.00, 'installer': 'CEG'},
|
||||
# {'capacity_kwh': 5, 'description': 'Battery Retrofit existing system', 'cost': 4250.00, 'installer': 'CEG'},
|
||||
# {'capacity_kwh': 10, 'description': 'Battery Retrofit Existing system', 'cost': 5950.00, 'installer': 'CEG'}
|
||||
|
|
@ -102,10 +107,14 @@ TTZC_SMART_THERMOSTAT_LABOUR_HOURS = 2
|
|||
TTZC_ELECTRICIAN_HOURLY_RATE = 45
|
||||
# Based on cost of a Nest temperature sensor
|
||||
TTZC_ROOM_TEMPERATURE_SENSOR_COST = 50
|
||||
TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS = 0.17 # (Assume ~ 10 mins install per sensor)
|
||||
TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS = (
|
||||
0.17 # (Assume ~ 10 mins install per sensor)
|
||||
)
|
||||
# Basedon an average cost of smart radiator values
|
||||
TTZC_SMART_RADIATOR_VALUES = 50
|
||||
TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS = 0.37 # (Assume ~ 15-30 mins install per valve)
|
||||
TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS = (
|
||||
0.37 # (Assume ~ 15-30 mins install per valve)
|
||||
)
|
||||
|
||||
# boiler prices based on
|
||||
# This is the cost of a firs time central heating install from The Warm Front rate card
|
||||
|
|
@ -169,7 +178,7 @@ class Costs:
|
|||
"heater_removal": 0.1,
|
||||
"sealing_open_fireplace": 0.1,
|
||||
"mechanical_ventilation": 0.26,
|
||||
"sloping_ceiling_insulation": 0.26 # Similar to IWI so using the same contingency
|
||||
"sloping_ceiling_insulation": 0.26, # Similar to IWI so using the same contingency
|
||||
}
|
||||
|
||||
# Preliminaries are a percentage of the total cost of the work and covers the cost of site-specific costs
|
||||
|
|
@ -195,36 +204,46 @@ class Costs:
|
|||
|
||||
:param property_instance: Instance of a Property class containing relevant details like wall area.
|
||||
"""
|
||||
if not hasattr(property_instance, 'insulation_wall_area'):
|
||||
raise ValueError("Property instance must have an 'insulation_wall_area' attribute")
|
||||
if not hasattr(property_instance, "insulation_wall_area"):
|
||||
raise ValueError(
|
||||
"Property instance must have an 'insulation_wall_area' attribute"
|
||||
)
|
||||
self.property = property_instance
|
||||
self.regional_labour_variations = regional_labour_variations
|
||||
|
||||
self.region = county_to_region_map.get(self.property.epc_record.county, None)
|
||||
if self.region is None:
|
||||
# Try and grab using the local-authority-label
|
||||
self.region = county_to_region_map.get(self.property.epc_record.local_authority_label, None)
|
||||
self.region = county_to_region_map.get(
|
||||
self.property.epc_record.local_authority_label, None
|
||||
)
|
||||
|
||||
if self.region is None:
|
||||
# Try and get the region after converting the keys to lower
|
||||
self.region = {
|
||||
k.lower(): v for k, v in county_to_region_map.items()
|
||||
}.get(self.property.epc_record.local_authority_label.lower(), None)
|
||||
if self.property.epc_record.local_authority_label is not None:
|
||||
self.region = {
|
||||
k.lower(): v for k, v in county_to_region_map.items()
|
||||
}.get(self.property.epc_record.local_authority_label.lower(), None)
|
||||
|
||||
if self.region is None:
|
||||
logger.warning("No region found for county %s, defaulting to South East England",
|
||||
self.property.epc_record.county)
|
||||
logger.warning(
|
||||
"No region found for county %s, defaulting to South East England",
|
||||
self.property.epc_record.county,
|
||||
)
|
||||
self.region = "South East England"
|
||||
|
||||
self.labour_adjustment_factor = [
|
||||
x["Adjustment_Factor"] for x in self.regional_labour_variations if
|
||||
x["Region"] == self.region
|
||||
x["Adjustment_Factor"]
|
||||
for x in self.regional_labour_variations
|
||||
if x["Region"] == self.region
|
||||
][0]
|
||||
|
||||
if not self.labour_adjustment_factor:
|
||||
raise ValueError("Labour adjustment factor not found")
|
||||
|
||||
def cavity_wall_insulation(self, wall_area, material, is_extraction_and_refill=False):
|
||||
def cavity_wall_insulation(
|
||||
self, wall_area, material, is_extraction_and_refill=False
|
||||
):
|
||||
"""
|
||||
Calculates the total cost for cavity wall insulation based on material and labor costs,
|
||||
including contingency, preliminaries, profit, and VAT.
|
||||
|
|
@ -318,7 +337,8 @@ class Costs:
|
|||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"contingency": self.CONTINGENCIES["suspended_floor_insulation"] * total_cost,
|
||||
"contingency": self.CONTINGENCIES["suspended_floor_insulation"]
|
||||
* total_cost,
|
||||
"contingency_rate": self.CONTINGENCIES["suspended_floor_insulation"],
|
||||
"labour_hours": labour_hours,
|
||||
"labour_days": labour_days,
|
||||
|
|
@ -370,8 +390,7 @@ class Costs:
|
|||
# - Apply sub-linear scaling for realism
|
||||
# - Enforce a minimum duration so estimates are not unrealistically low
|
||||
labour_days = max(
|
||||
min_days,
|
||||
base_days * (insulation_floor_area / base_area) ** labour_exponent
|
||||
min_days, base_days * (insulation_floor_area / base_area) ** labour_exponent
|
||||
)
|
||||
|
||||
return labour_days
|
||||
|
|
@ -388,7 +407,9 @@ class Costs:
|
|||
total_cost = material["total_cost"] * insulation_floor_area
|
||||
daily_labour_rate = 300 # Based on checkatrade
|
||||
|
||||
labour_days = self._estimate_number_of_days_for_solid_floor(insulation_floor_area)
|
||||
labour_days = self._estimate_number_of_days_for_solid_floor(
|
||||
insulation_floor_area
|
||||
)
|
||||
labour_cost = labour_days * daily_labour_rate
|
||||
|
||||
total_cost = total_cost + labour_cost
|
||||
|
|
@ -404,7 +425,6 @@ class Costs:
|
|||
}
|
||||
|
||||
def low_energy_lighting(self, number_of_lights, material):
|
||||
|
||||
"""
|
||||
Calculates the total cost for low energy lighting based on material and labor costs,
|
||||
including contingency, preliminaries, profit, and VAT.
|
||||
|
|
@ -419,7 +439,7 @@ class Costs:
|
|||
total_cost = material["total_cost"] * number_of_lights
|
||||
|
||||
labour_hours = 1
|
||||
labour_days = (labour_hours / 8)
|
||||
labour_days = labour_hours / 8
|
||||
|
||||
return {
|
||||
"total": total_cost,
|
||||
|
|
@ -450,26 +470,22 @@ class Costs:
|
|||
}
|
||||
|
||||
@classmethod
|
||||
def solar_pv(
|
||||
cls,
|
||||
solar_product,
|
||||
scaffolding_options,
|
||||
n_floors
|
||||
):
|
||||
|
||||
"""
|
||||
|
||||
"""
|
||||
def solar_pv(cls, solar_product, scaffolding_options, n_floors):
|
||||
""" """
|
||||
|
||||
system_cost = solar_product["total_cost"]
|
||||
|
||||
if not solar_product["includes_scaffolding"]:
|
||||
# We base this on the number of floors
|
||||
scaffolding = [x["total_cost"] for x in scaffolding_options if x["size"] == n_floors]
|
||||
scaffolding = [
|
||||
x["total_cost"] for x in scaffolding_options if x["size"] == n_floors
|
||||
]
|
||||
if not scaffolding:
|
||||
# If we have no options, handle this
|
||||
if n_floors <= 3:
|
||||
raise ValueError("No scaffolding options available for 3 or fewer floors")
|
||||
raise ValueError(
|
||||
"No scaffolding options available for 3 or fewer floors"
|
||||
)
|
||||
# We take the largest scaffolding option available
|
||||
scaffolding_cost = max([x["total_cost"] for x in scaffolding_options])
|
||||
else:
|
||||
|
|
@ -523,9 +539,9 @@ class Costs:
|
|||
We base the estimates for the cost of electric room heaters on the cost per room as estimated by the
|
||||
following article:
|
||||
https://www.bestelectricradiators.co.uk/blog/cost-to-install-a-new-heating-system-uk/
|
||||
|
||||
|
||||
:param number_heated_rooms: int, number of rooms to be heated
|
||||
:return:
|
||||
:return:
|
||||
"""
|
||||
|
||||
total_cost = 500 * number_heated_rooms
|
||||
|
|
@ -547,11 +563,11 @@ class Costs:
|
|||
}
|
||||
|
||||
def high_heat_electric_storage_heaters(
|
||||
self, number_heated_rooms: int,
|
||||
self,
|
||||
number_heated_rooms: int,
|
||||
needs_cylinder: bool,
|
||||
product: dict | None = None
|
||||
product: dict | None = None,
|
||||
):
|
||||
|
||||
"""
|
||||
We base the estimates for the cost of electric storage heaters on the cost per room as estimated by the
|
||||
energy saving trust
|
||||
|
|
@ -578,8 +594,11 @@ class Costs:
|
|||
|
||||
return {
|
||||
"total": total_cost,
|
||||
"contingency": total_cost * self.CONTINGENCIES["high_heat_retention_storage_heaters"],
|
||||
"contingency_rate": self.CONTINGENCIES["high_heat_retention_storage_heaters"],
|
||||
"contingency": total_cost
|
||||
* self.CONTINGENCIES["high_heat_retention_storage_heaters"],
|
||||
"contingency_rate": self.CONTINGENCIES[
|
||||
"high_heat_retention_storage_heaters"
|
||||
],
|
||||
"subtotal": subtotal_before_vat,
|
||||
"vat": vat,
|
||||
"labour_hours": labour_hours,
|
||||
|
|
@ -690,14 +709,14 @@ class Costs:
|
|||
|
||||
# The product costs are inclusive of VAT
|
||||
product_costs = (
|
||||
TTZC_SMART_THERMOSTAT_COST +
|
||||
TTZC_ROOM_TEMPERATURE_SENSOR_COST * number_heated_rooms +
|
||||
TTZC_SMART_RADIATOR_VALUES * number_heated_rooms
|
||||
TTZC_SMART_THERMOSTAT_COST
|
||||
+ TTZC_ROOM_TEMPERATURE_SENSOR_COST * number_heated_rooms
|
||||
+ TTZC_SMART_RADIATOR_VALUES * number_heated_rooms
|
||||
)
|
||||
labour_hours = (
|
||||
TTZC_SMART_THERMOSTAT_LABOUR_HOURS +
|
||||
TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS * number_heated_rooms +
|
||||
TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS * number_heated_rooms
|
||||
TTZC_SMART_THERMOSTAT_LABOUR_HOURS
|
||||
+ TTZC_ROOM_TEMPERATURE_SENSOR_LABOUR_HOURS * number_heated_rooms
|
||||
+ TTZC_SMART_RADIATOR_VALUES_LABOUR_HOURS * number_heated_rooms
|
||||
)
|
||||
labour_costs = TTZC_ELECTRICIAN_HOURLY_RATE * labour_hours
|
||||
# Add continency and preliminaries to the labour to account for the complexity of the job
|
||||
|
|
@ -722,7 +741,9 @@ class Costs:
|
|||
"labour_days": labour_days,
|
||||
}
|
||||
|
||||
def programmer_trvs_bypass(self, number_heated_rooms, has_programmer, has_trvs, has_bypass):
|
||||
def programmer_trvs_bypass(
|
||||
self, number_heated_rooms, has_programmer, has_trvs, has_bypass
|
||||
):
|
||||
|
||||
total_cost = 0
|
||||
labour_hours = 0
|
||||
|
|
@ -779,7 +800,9 @@ class Costs:
|
|||
}
|
||||
|
||||
@staticmethod
|
||||
def _estimate_n_radiators(number_habitable_rooms, total_floor_area, property_type, built_form):
|
||||
def _estimate_n_radiators(
|
||||
number_habitable_rooms, total_floor_area, property_type, built_form
|
||||
):
|
||||
# Base number of radiators: one per habitable room
|
||||
base_radiators = number_habitable_rooms
|
||||
|
||||
|
|
@ -787,34 +810,49 @@ class Costs:
|
|||
additional_radiators = 3 # Initial assumption
|
||||
|
||||
# Adjust additional radiators based on property type
|
||||
if property_type == 'Flat':
|
||||
additional_radiators -= 1 # Flats may need fewer radiators due to less exposure
|
||||
elif property_type in ['House', 'Bungalow', 'Maisonette']:
|
||||
if property_type == "Flat":
|
||||
additional_radiators -= (
|
||||
1 # Flats may need fewer radiators due to less exposure
|
||||
)
|
||||
elif property_type in ["House", "Bungalow", "Maisonette"]:
|
||||
# Multiple floors in Maisonette may require additional heating points
|
||||
additional_radiators += 2 # Houses and bungalows might need more due to greater exposure
|
||||
additional_radiators += (
|
||||
2 # Houses and bungalows might need more due to greater exposure
|
||||
)
|
||||
else:
|
||||
raise Exception("Invalid property type")
|
||||
|
||||
# Adjust total radiator needs based on built form
|
||||
form_factor = {
|
||||
'Enclosed Mid-Terrace': 0.9,
|
||||
'Mid-Terrace': 0.95,
|
||||
'Enclosed End-Terrace': 0.95,
|
||||
'Semi-Detached': 1.05,
|
||||
'Detached': 1.25,
|
||||
'End-Terrace': 1.05
|
||||
"Enclosed Mid-Terrace": 0.9,
|
||||
"Mid-Terrace": 0.95,
|
||||
"Enclosed End-Terrace": 0.95,
|
||||
"Semi-Detached": 1.05,
|
||||
"Detached": 1.25,
|
||||
"End-Terrace": 1.05,
|
||||
}
|
||||
|
||||
# Calculate total heating power needed and number of radiators based on standard output
|
||||
total_heating_power_required = total_floor_area * 80 # Watts per square meter
|
||||
radiator_output = 1000 # Average wattage per radiator
|
||||
total_radiators_based_on_power = (total_heating_power_required / radiator_output) * form_factor[built_form]
|
||||
total_radiators_based_on_power = (
|
||||
total_heating_power_required / radiator_output
|
||||
) * form_factor[built_form]
|
||||
|
||||
# Final estimation taking the higher of calculated needs or base room count
|
||||
estimated_radiators = max(total_radiators_based_on_power, base_radiators + additional_radiators)
|
||||
estimated_radiators = max(
|
||||
total_radiators_based_on_power, base_radiators + additional_radiators
|
||||
)
|
||||
return round(estimated_radiators)
|
||||
|
||||
def boiler(self, exising_room_heaters, system_change, n_heated_rooms, n_rooms, is_electric=False):
|
||||
def boiler(
|
||||
self,
|
||||
exising_room_heaters,
|
||||
system_change,
|
||||
n_heated_rooms,
|
||||
n_rooms,
|
||||
is_electric=False,
|
||||
):
|
||||
"""
|
||||
Based on a basic estimate of median value £2600 to install a low carbon combi boiler
|
||||
First time central heating vosts can als be found here:
|
||||
|
|
@ -859,12 +897,14 @@ class Costs:
|
|||
number_habitable_rooms=n_rooms,
|
||||
total_floor_area=self.property.floor_area,
|
||||
property_type=self.property.epc_record.property_type,
|
||||
built_form=self.property.epc_record.built_form
|
||||
built_form=self.property.epc_record.built_form,
|
||||
)
|
||||
|
||||
additionals_labour_cost = labour_rate * self.labour_adjustment_factor
|
||||
radiator_cost = DOUBLE_RADIATOR_COST * n_radiators
|
||||
system_change_cost = radiator_cost + FLUE_COST + PIPEWORK_COST + additionals_labour_cost
|
||||
system_change_cost = (
|
||||
radiator_cost + FLUE_COST + PIPEWORK_COST + additionals_labour_cost
|
||||
)
|
||||
system_change_cost_before_vat = system_change_cost / (1 + self.VAT_RATE)
|
||||
system_change_vat = system_change_cost - system_change_cost_before_vat
|
||||
# We add an extra labour day for the system change
|
||||
|
|
@ -897,14 +937,18 @@ class Costs:
|
|||
else:
|
||||
return 250
|
||||
|
||||
def air_source_heat_pump(self, ashp_size: float, number_heated_rooms: int, total_floor_area: float) -> dict:
|
||||
def air_source_heat_pump(
|
||||
self, ashp_size: float, number_heated_rooms: int, total_floor_area: float
|
||||
) -> dict:
|
||||
"""
|
||||
We produce a cost estimation for an air source heat pump, based on costs we have received from installers.
|
||||
|
||||
"""
|
||||
|
||||
system_cost = (
|
||||
(ASHP_SMALL_SYSTEM_COST if ashp_size <= 8.5 else ASHP_LARGE_SYSTEM_COST) + ASHP_SECURITY + ASHP_WALL_BRACKET
|
||||
(ASHP_SMALL_SYSTEM_COST if ashp_size <= 8.5 else ASHP_LARGE_SYSTEM_COST)
|
||||
+ ASHP_SECURITY
|
||||
+ ASHP_WALL_BRACKET
|
||||
)
|
||||
|
||||
available_n_rads = [x["n_radiators"] for x in ASHP_DISTRIBUTION_SYSTEM_COSTS]
|
||||
|
|
@ -940,7 +984,9 @@ class Costs:
|
|||
}
|
||||
|
||||
@staticmethod
|
||||
def _estimate_number_of_days_for_sloping_ceiling(insulation_roof_area: float) -> float:
|
||||
def _estimate_number_of_days_for_sloping_ceiling(
|
||||
insulation_roof_area: float,
|
||||
) -> float:
|
||||
"""
|
||||
Estimate labour days required to insulate an existing sloping ceiling.
|
||||
|
||||
|
|
@ -965,14 +1011,15 @@ class Costs:
|
|||
min_days = 2
|
||||
|
||||
labour_days = max(
|
||||
min_days,
|
||||
base_days * (insulation_roof_area / base_area) ** labour_exponent
|
||||
min_days, base_days * (insulation_roof_area / base_area) ** labour_exponent
|
||||
)
|
||||
|
||||
return labour_days
|
||||
|
||||
@classmethod
|
||||
def sloping_ceiling_insulation(cls, insulation_roof_area: float) -> Mapping[str, float]:
|
||||
def sloping_ceiling_insulation(
|
||||
cls, insulation_roof_area: float
|
||||
) -> Mapping[str, float]:
|
||||
"""
|
||||
This costing for this is based on Checkatrade desktop research, since we are yet to receive installer quotes.
|
||||
:param insulation_roof_area: Area of the sloping ceiling to be insulated
|
||||
|
|
@ -985,14 +1032,20 @@ class Costs:
|
|||
# https://www.checkatrade.com/blog/cost-guides/vaulted-ceiling-cost/
|
||||
# https://www.thegreenage.co.uk/can-i-insulate-my-sloping-ceiling/
|
||||
# These assumptions last updated 21/02/2026
|
||||
insulation_cost_per_m2 = 52 # The actual install process is quite similar to IWI
|
||||
insulation_cost_per_m2 = (
|
||||
52 # The actual install process is quite similar to IWI
|
||||
)
|
||||
labour_rate = 250 # per day
|
||||
contingency_rate = cls.CONTINGENCIES["sloping_ceiling_insulation"]
|
||||
|
||||
labour_days = cls._estimate_number_of_days_for_sloping_ceiling(insulation_roof_area)
|
||||
labour_days = cls._estimate_number_of_days_for_sloping_ceiling(
|
||||
insulation_roof_area
|
||||
)
|
||||
labour_hours = labour_days * 8
|
||||
|
||||
total = (insulation_cost_per_m2 * insulation_roof_area) + (labour_rate * labour_days)
|
||||
total = (insulation_cost_per_m2 * insulation_roof_area) + (
|
||||
labour_rate * labour_days
|
||||
)
|
||||
|
||||
# Assume VAT included in the total => total is 120% of subtotal
|
||||
vat = total - (total / 1.2)
|
||||
|
|
|
|||
48
scripts/historic_epc_demo.py
Normal file
48
scripts/historic_epc_demo.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
"""Demo: look up historic EPC records for an address + postcode.
|
||||
|
||||
Reads the gzipped CSV at
|
||||
s3://retrofit-data-dev/historical_epc/<POSTCODE>/data.csv.gz
|
||||
scores rows against the user-provided address, and prints the top matches.
|
||||
|
||||
Usage:
|
||||
python -m scripts.historic_epc_demo "47 Gordon Road" "AB33 8AL"
|
||||
python -m scripts.historic_epc_demo # uses defaults below
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
from datatypes.epc.domain.historic_epc_matching import match_addresses_for_postcode
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def main(user_address: str, postcode: str) -> None:
|
||||
print(f"Looking up: {user_address!r} @ {postcode!r}\n")
|
||||
|
||||
result = match_addresses_for_postcode(user_address, postcode)
|
||||
|
||||
print(f"Found {len(result.matches)} candidate row(s).\n")
|
||||
|
||||
print("Top 3 matches:")
|
||||
for m in result.top_n(3):
|
||||
print(
|
||||
f" rank={m.lexirank} score={m.lexiscore:.3f} "
|
||||
f"uprn={m.record.uprn or '(none)':<14} {m.record.address}"
|
||||
)
|
||||
|
||||
print()
|
||||
uprn: Optional[str] = result.unambiguous_uprn()
|
||||
if uprn:
|
||||
print(f"Unambiguous UPRN: {uprn}")
|
||||
else:
|
||||
print("No unambiguous UPRN (zero-score, tie, or empty result).")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = sys.argv[1:]
|
||||
if len(args) == 2:
|
||||
main(args[0], args[1])
|
||||
elif len(args) == 0:
|
||||
main("47 Gordon Road", "AB33 8AL")
|
||||
else:
|
||||
print(__doc__)
|
||||
sys.exit(2)
|
||||
|
|
@ -26,13 +26,13 @@ from backend.app.db.functions.materials_functions import get_materials
|
|||
from collections import defaultdict
|
||||
from sqlalchemy import func
|
||||
|
||||
PORTFOLIO_ID = 711
|
||||
SCENARIOS = [1233]
|
||||
PORTFOLIO_ID = 632
|
||||
SCENARIOS = [1144]
|
||||
scenario_names = {
|
||||
1233: "Reach EPC C",
|
||||
1144: "EPC C",
|
||||
}
|
||||
|
||||
project_name = "Novus"
|
||||
project_name = "Calico Refresh"
|
||||
|
||||
|
||||
def get_data(portfolio_id, scenario_ids):
|
||||
|
|
|
|||
14
utils/pandas_utils.py
Normal file
14
utils/pandas_utils.py
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def pandas_cell_to_str(v: Any) -> str:
|
||||
if v is None or (isinstance(v, float) and pd.isna(v)):
|
||||
return ""
|
||||
s = str(v).replace("\xa0", " ")
|
||||
# get_uprn_candidates runs .astype(str) on UPRN, turning NaN into "nan".
|
||||
# Treat that as missing so unambiguous_uprn truthiness checks work.
|
||||
if s.lower() == "nan":
|
||||
return ""
|
||||
return s
|
||||
17
utils/s3.py
17
utils/s3.py
|
|
@ -6,8 +6,6 @@ from io import BytesIO, StringIO
|
|||
from urllib.parse import unquote
|
||||
from utils.logger import setup_logger
|
||||
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
|
||||
from typing import Any
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
|
|
@ -167,6 +165,21 @@ def read_dataframe_from_s3_parquet(bucket_name, file_key):
|
|||
return df
|
||||
|
||||
|
||||
def read_csv_gz_from_s3(bucket_name: str, file_key: str) -> pd.DataFrame:
|
||||
"""
|
||||
Read a gzipped CSV from S3 into a pandas DataFrame.
|
||||
|
||||
:param bucket_name: Name of the S3 bucket.
|
||||
:param file_key: Key of the file (must end in .csv.gz).
|
||||
:return: A pandas DataFrame.
|
||||
"""
|
||||
if not file_key.endswith(".csv.gz"):
|
||||
raise ValueError("file_key must end with .csv.gz")
|
||||
|
||||
buffer = read_io_from_s3(bucket_name=bucket_name, file_key=file_key)
|
||||
return pd.read_csv(buffer, compression="gzip", low_memory=False)
|
||||
|
||||
|
||||
def save_csv_to_s3(dataframe, bucket_name, file_name):
|
||||
"""
|
||||
Save a Pandas DataFrame to a CSV file in an S3 bucket.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue