fixed merge conflicts with main

This commit is contained in:
Khalim Conn-Kowlessar 2026-02-07 21:19:40 +00:00
commit 7bb7972549
84 changed files with 3370 additions and 1119 deletions

View file

@ -0,0 +1,39 @@
FROM python:3.11.10-bullseye
ARG USER=vscode
ARG DEBIAN_FRONTEND=noninteractive
# 1) Toolchain + utilities for building libpostal
RUN apt-get update && apt-get install -y --no-install-recommends \
sudo jq vim curl git ca-certificates \
build-essential pkg-config automake autoconf libtool \
&& rm -rf /var/lib/apt/lists/*
# # 2) Build and install libpostal from source
RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
&& cd /tmp/libpostal \
&& ./bootstrap.sh \
&& ./configure --datadir=/usr/local/share/libpostal \
&& make -j"$(nproc)" \
&& make install \
&& ldconfig \
&& rm -rf /tmp/libpostal
# 3) Create the user and grant sudo privileges
RUN useradd -m -s /usr/bin/bash ${USER} \
&& echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \
&& chmod 0440 /etc/sudoers.d/${USER}
# # 4) Python deps - if you want to run assest list
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
ADD asset_list/requirements.txt requirements.txt
RUN pip install -r requirements.txt
RUN pip install -r requirements.txt
# 5) Workdir
WORKDIR /workspaces/model
# 6) Make Python find your package
# Add project root to PYTHONPATH for all processes
ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}

View file

@ -1,7 +1,7 @@
{
"name": "Basic Python",
"name": "SAL ENV",
"dockerComposeFile": "docker-compose.yml",
"service": "model",
"service": "model-sal",
"remoteUser": "vscode",
"workspaceFolder": "/workspaces/model",
"postStartCommand": "bash .devcontainer/post-install.sh",

View file

@ -1,14 +1,14 @@
version: '3.8'
services:
model:
model-sal:
user: "${UID}:${GID}"
build:
context: ..
dockerfile: .devcontainer/Dockerfile
context: ../..
dockerfile: .devcontainer/asset_list/Dockerfile
command: sleep infinity
volumes:
- ..:/workspaces/model
- ../../:/workspaces/model
networks:
- model-net

View file

@ -0,0 +1,24 @@
fastapi==0.115.2
sqlalchemy==2.0.36
psycopg2-binary==2.9.10
python-jose==3.3.0
cryptography==43.0.3
mangum==0.19.0
# AWS
boto3==1.35.44
# Data
openpyxl==3.1.2
# Basic
pytz
uvicorn[standard]
# Testing
pytest==9.0.2
pytest-cov==7.0.0
ipykernel>=6.25,<7
pydantic-settings<2
pyyaml>=6.0.1
pydantic>=1.10.7,<2
sqlmodel
# Formatting
black==26.1.0
dotenv

View file

@ -34,7 +34,7 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
ADD backend/engine/requirements.txt requirements1.txt
ADD backend/app/requirements/requirements.txt requirements2.txt
ADD .devcontainer/requirements.txt requirements3.txt
ADD .devcontainer/backend/requirements.txt requirements3.txt
RUN cat requirements1.txt requirements2.txt requirements3.txt > requirements.txt
RUN pip install -r requirements.txt

View file

@ -0,0 +1,40 @@
{
"name": "Backend Model Env",
"dockerComposeFile": "docker-compose.yml",
"service": "model-backend",
"remoteUser": "vscode",
"workspaceFolder": "/workspaces/model",
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
"mounts": [
"source=${localEnv:HOME},target=/workspaces/home,type=bind"
],
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-toolsai.jupyter",
"mechatroner.rainbow-csv",
"ms-toolsai.datawrangler",
"lindacong.vscode-book-reader",
"4ops.terraform",
"fabiospampinato.vscode-todo-plus",
"jgclark.vscode-todo-highlight",
"corentinartaud.pdfpreview",
"ms-python.vscode-python-envs",
"ms-python.black-formatter",
"waderyan.gitblame"
],
"settings": {
"files.defaultWorkspace": "/workspaces/model",
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true
},
"python.formatting.provider": "none"
}
}
},
"containerEnv": {
"PYTHONFLAGS": "-Xfrozen_modules=off"
}
}

View file

@ -0,0 +1,28 @@
version: '3.8'
services:
model-backend:
user: "${UID}:${GID}"
build:
context: ../..
dockerfile: .devcontainer/backend/Dockerfile
command: sleep infinity
volumes:
- ../../:/workspaces/model
db:
image: postgres:17.4
restart: unless-stopped
ports:
- 5432:5432
environment:
- PGDATABASE=tech_team_local_db
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=makingwarmerhomes
volumes:
- postgres-data-two:/var/lib/postgresql/data
volumes:
postgres-data-two:

View file

@ -0,0 +1,14 @@
mkdir -p ~/.ipython/profile_default/startup
cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py
from dotenv import load_dotenv
import os
# Adjust path as needed
env_path = "/workspaces/model/backend/.env"
if os.path.exists(env_path):
load_dotenv(env_path)
print("✔ Loaded .env into Jupyter kernel")
else:
print("⚠ No .env file found to load")
EOF

View file

@ -1,4 +1,4 @@
# fastapi
fastapi==0.115.2
sqlalchemy==2.0.36
pydantic-settings==2.6.0

78
.github/workflows/_build_image.yml vendored Normal file
View file

@ -0,0 +1,78 @@
name: Build Docker image
on:
workflow_call:
inputs:
ecr_repo:
required: true
type: string
dockerfile_path:
required: true
type: string
build_context:
required: false
default: "."
type: string
outputs:
image_digest:
description: "Pushed image digest"
value: ${{ jobs.build.outputs.image_digest }}
ecr_repo_url:
description: "ECR repository URL"
value: ${{ jobs.build.outputs.ecr_repo_url }}
secrets:
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
AWS_REGION:
required: true
jobs:
build:
runs-on: ubuntu-latest
outputs:
image_digest: ${{ steps.digest.outputs.image_digest }}
ecr_repo_url: ${{ steps.repo.outputs.ecr_repo_url }}
steps:
- uses: actions/checkout@v4
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- uses: aws-actions/amazon-ecr-login@v2
- name: Resolve ECR repo URL
id: repo
run: |
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
ECR_REPO_URL="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${{ inputs.ecr_repo }}"
echo "Resolved ECR repo URL (local var):"
echo "$ECR_REPO_URL"
echo "ecr_repo_url=$ECR_REPO_URL" >> "$GITHUB_OUTPUT"
- name: Build & push image
run: |
IMAGE_URI="${{ steps.repo.outputs.ecr_repo_url }}:${GITHUB_SHA}"
docker build -f ${{ inputs.dockerfile_path }} -t $IMAGE_URI ${{ inputs.build_context }}
docker push $IMAGE_URI
- name: Resolve image digest
id: digest
run: |
DIGEST=$(aws ecr describe-images \
--repository-name ${{ inputs.ecr_repo }} \
--image-ids imageTag=${GITHUB_SHA} \
--query 'imageDetails[0].imageDigest' \
--output text)
echo "image_digest=$DIGEST" >> "$GITHUB_OUTPUT"

91
.github/workflows/_deploy_lambda.yml vendored Normal file
View file

@ -0,0 +1,91 @@
name: Deploy Lambda (Terraform)
on:
workflow_call:
inputs:
lambda_name:
required: true
type: string
lambda_path:
required: true
type: string
stage:
required: true
type: string
ecr_repo:
required: true
type: string
image_digest:
required: true
type: string
secrets:
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
AWS_REGION:
required: true
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Debug inputs
run: |
echo "lambda_name=${{ inputs.lambda_name }}"
echo "lambda_path=${{ inputs.lambda_path }}"
echo "stage=${{ inputs.stage }}"
echo "ecr_repo_url=${{ inputs.ecr_repo_url }}"
echo "image_digest=${{ inputs.image_digest }}"
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- uses: hashicorp/setup-terraform@v3
- uses: aws-actions/amazon-ecr-login@v2
- name: Resolve ECR repo URL
id: repo
env:
AWS_REGION: ${{ secrets.AWS_REGION }}
run: |
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
ECR_REPO_URL="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${{ inputs.ecr_repo }}"
echo "ecr_repo_url=$ECR_REPO_URL" >> "$GITHUB_OUTPUT"
- name: Terraform Init
working-directory: ${{ inputs.lambda_path }}
run: terraform init -reconfigure
- name: Terraform Workspace
working-directory: ${{ inputs.lambda_path }}
run: |
terraform workspace select ${{ inputs.stage }} \
|| terraform workspace new ${{ inputs.stage }}
- name: Terraform Plan
working-directory: ${{ inputs.lambda_path }}
run: |
terraform plan \
-var="stage=${{ inputs.stage }}" \
-var="lambda_name=${{ inputs.lambda_name }}" \
-var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \
-var="image_digest=${{ inputs.image_digest }}" \
-out=lambdaplan
- name: Terraform Apply
working-directory: ${{ inputs.lambda_path }}
run: terraform apply -auto-approve lambdaplan

View file

@ -1,80 +1,98 @@
name: Deploy terraform stack
name: Deploy infrastructure
on:
push:
branches:
- dev
- prod
- "**"
jobs:
deploy:
determine_stage:
runs-on: ubuntu-latest
outputs:
stage: ${{ steps.set-stage.outputs.stage }}
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Setup AWS credentials file
- name: Determine stage from branch
id: set-stage
shell: bash
run: |
mkdir -p ~/.aws
echo "[DevAdmin]" > ~/.aws/credentials
echo "aws_access_key_id = ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}" >> ~/.aws/credentials
echo "aws_secret_access_key = ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}" >> ~/.aws/credentials
echo "[ProdAdmin]" >> ~/.aws/credentials
echo "aws_access_key_id = ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}" >> ~/.aws/credentials
echo "aws_secret_access_key = ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}" >> ~/.aws/credentials
BRANCH="${GITHUB_REF_NAME}"
- name: Setup AWS config file
run: |
echo "[profile DevAdmin]" > ~/.aws/config
echo "region = eu-west-2" >> ~/.aws/config
echo "[profile ProdAdmin]" >> ~/.aws/config
echo "region = eu-west-2" >> ~/.aws/config
if [[ "$BRANCH" == "prod" ]]; then
echo "stage=prod" >> "$GITHUB_OUTPUT"
- name: Setup Terraform
uses: hashicorp/setup-terraform@v1
with:
terraform_version: 1.5.2
elif [[ "$BRANCH" == "dev" ]]; then
echo "stage=dev" >> "$GITHUB_OUTPUT"
- name: Configure AWS credentials (DevAdmin)
uses: aws-actions/configure-aws-credentials@v1
else
echo "stage=dev" >> "$GITHUB_OUTPUT"
fi
# ============================================================
# 1⃣ Shared Terraform (infra)
# ============================================================
shared_terraform:
needs: determine_stage
runs-on: ubuntu-latest
env:
STAGE: ${{ needs.determine_stage.outputs.stage }}
steps:
- uses: actions/checkout@v4
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-2
env:
AWS_PROFILE: "DevAdmin"
aws-region: ${{ secrets.DEV_AWS_REGION }}
- uses: hashicorp/setup-terraform@v3
- name: Terraform Init
run: cd infrastructure/terraform && terraform init
working-directory: infrastructure/terraform/shared
run: terraform init -reconfigure
- name: Terraform Workspace
run: |
BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
cd infrastructure/terraform
terraform workspace select ${BRANCH_NAME} || terraform workspace new ${BRANCH_NAME}
working-directory: infrastructure/terraform/shared
run: terraform workspace select ${STAGE} || terraform workspace new ${STAGE}
- name: Terraform Plan
run: |
BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
cd infrastructure/terraform && terraform plan -var-file=${BRANCH_NAME}.tfvars
working-directory: infrastructure/terraform/shared
run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan
- name: Deploy to Dev
if: github.ref == 'refs/heads/dev'
run: cd infrastructure/terraform && terraform apply -var-file=dev.tfvars -auto-approve
env:
name: dev
- name: Terraform Apply
if: env.STAGE == 'prod'
working-directory: infrastructure/terraform/shared
run: terraform apply -auto-approve tfplan
- name: Configure AWS credentials (ProdAdmin)
uses: aws-actions/configure-aws-credentials@v1
# ============================================================
# 2⃣ Build Address 2 UPRN image and Push
# ============================================================
address2uprn_image:
needs: [determine_stage, shared_terraform]
uses: ./.github/workflows/_build_image.yml
with:
aws-access-key-id: ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-2
env:
AWS_PROFILE: "ProdAdmin"
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/address2UPRN/Dockerfile
build_context: backend/address2UPRN
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
- name: Deploy to Prod
if: github.ref == 'refs/heads/prod'
run: cd infrastructure/terraform && terraform apply -var-file=prod.tfvars -auto-approve
env:
name: prod
# ============================================================
# 3⃣ Deploy Address 2 UPRN Lambda
# ============================================================
address2uprn_lambda:
needs: [address2uprn_image, determine_stage]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: address2uprn
lambda_path: infrastructure/terraform/lambda/address2UPRN
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.address2uprn_image.outputs.image_digest }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}

View file

@ -2,6 +2,12 @@ name: Run unit tests
on:
pull_request:
branches:
- "**"
push:
branches:
- "**"
jobs:
test:

View file

@ -9,9 +9,12 @@
"path": "/bin/bash"
}
},
<<<<<<< HEAD
=======
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.testing.pytestArgs": ["-s", "-q", "--no-cov"]
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
// Hot reload setting that needs to be in user settings
// "jupyter.runStartupCommands": [

View file

@ -34,7 +34,8 @@ from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
logger = setup_logger()
# OpenAI API Key (set this in your environment variables for security)
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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")
class DataRemapper:
@ -1160,12 +1161,16 @@ class AssetList:
axis=1
)
col = self.EPC_API_DATA_NAMES["roof-description"]
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = self.standardised_asset_list.apply(
lambda x: RoofAttributes(description=x[self.EPC_API_DATA_NAMES["roof-description"]]).process()[
lambda x: RoofAttributes(description=x[col]).process()[
"insulation_thickness"] if not pd.isnull(
x[self.EPC_API_DATA_NAMES["roof-description"]]) else None,
x[col]) else None,
axis=1
)
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = (
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].str.replace("+", "")
)

View file

@ -1,5 +1,5 @@
# OpenAI API Key (set this in your environment variables for security)
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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")
class DataRemapper:

0
asset_list/__init__.py Normal file
View file

View file

@ -14,22 +14,32 @@ from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
EPC_AUTH_TOKEN = os.getenv(
"EPC_AUTH_TOKEN",
)
def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"):
def extract_address1(
asset_list, full_address_col, postcode_col, method="first_two_words"
):
if method == "first_two_words":
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
asset_list["address1_extracted"] = (
asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
)
return asset_list
if method == "first_word":
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0]
asset_list["address1_extracted"] = (
asset_list[full_address_col].str.split(" ").str[0]
)
return asset_list
if method == "house_number_extraction":
asset_list["address1_extracted"] = asset_list.apply(
lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]),
axis=1
lambda x: SearchEpc.get_house_number(
address=x[full_address_col], postcode=x[postcode_col]
),
axis=1,
)
return asset_list
@ -59,21 +69,20 @@ def app():
Property UPRN
"""
# Fairhive
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Fairhive"
data_filename = "Fairhive Asset list.xlsx"
sheet_name = "Sheet1"
postcode_column = 'POSTCODE'
address1_column = "ADDRESS"
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney"
data_filename = "Domna SHF Wave 3 (3).xlsx"
sheet_name = "Domna Wave 3"
postcode_column = "Postcode"
address1_column = "Address 1"
address1_method = None
fulladdress_column = 'ADDRESS'
address_cols_to_concat = []
fulladdress_column = None
address_cols_to_concat = ["Address 1"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "PROPERTY TYPE"
landlord_built_form = None
landlord_wall_construction = None
landlord_year_built = "Construction Years"
landlord_os_uprn = "UPRN"
landlord_property_type = "Type"
landlord_built_form = "Attachment"
landlord_wall_construction = "Wall type"
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
@ -93,93 +102,28 @@ def app():
asset_list_header = 0
landlord_block_reference = None
# Hyde
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Minor Works"
data_filename = "Hyde Group - Domna Minor Works Programme List.xlsx"
sheet_name = "Sheet1"
postcode_column = 'Postcode'
address1_column = None
address1_method = "house_number_extraction"
fulladdress_column = 'Address'
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Age"
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_built_form = "Property Type"
landlord_wall_construction = "Walls"
landlord_roof_construction = "Roofs"
landlord_heating_system = "Heating"
landlord_existing_pv = "Renewables"
landlord_property_id = "Organisation Reference"
landlord_sap = "SAP (10)"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_id_colnames = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA/20260129 SAL"
data_filename = "NCHA ASSET LIST 1.xlsx"
sheet_name = "NCHA ASSET LIST"
postcode_column = 'POSTCODE'
address1_column = None
address1_method = "house_number_extraction"
fulladdress_column = 'ADDRESS'
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "PROPERTY TYPE"
landlord_built_form = "BUILD FORM"
landlord_wall_construction = "wall combined"
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "UPRN"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_id_colnames = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
# Peabody data for cleaning
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation")
data_folder = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation"
)
data_filename = "to_standardise_uprns.xlsx"
sheet_name = "Sheet1"
postcode_column = 'Postcode'
address1_column = "Address 1"
address1_method = None
fulladdress_column = None
address_cols_to_concat = ["Address 1", "Address 2", "Address 3"]
postcode_column = "Postcode"
address1_column = None
address1_method = "house_number_extraction"
fulladdress_column = "Address"
address_cols_to_concat = None
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Type"
landlord_built_form = "Attachment"
landlord_property_type = None
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Org Ref"
landlord_property_id = "LLUPRN"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
@ -195,40 +139,6 @@ def app():
asset_list_header = 0
landlord_block_reference = None
# Lambeth:
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th"
# data_filename = "lambeth_sw2_leigham court estate.xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Postcode'
# address1_column = "Address"
# address1_method = None
# fulladdress_column = None
# address_cols_to_concat = ["Address"]
# missing_postcodes_method = None
# landlord_year_built = None
# landlord_os_uprn = None
# landlord_property_type = None
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "row_id"
# landlord_sap = None
# outcomes_filename = None
# outcomes_sheetname = None
# outcomes_postcode = None
# outcomes_houseno = None
# outcomes_id = None
# outcomes_address = None
# master_filepaths = []
# master_id_colnames = []
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = None
# asset_list_header = 0
# landlord_block_reference = None
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}
@ -253,49 +163,62 @@ def app():
landlord_existing_pv=landlord_existing_pv,
landlord_sap=landlord_sap,
landlord_block_reference=landlord_block_reference,
phase=phase
phase=phase,
)
asset_list.init_standardise()
# We produce the new maps, which can be saved for future useage
new_property_type_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_property_type] if
asset_list.landlord_property_type else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_property_type]
if asset_list.landlord_property_type
else {}
).items()
if k not in PROPERTY_MAPPING
}
new_built_form_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_built_form] if
asset_list.landlord_built_form else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_built_form]
if asset_list.landlord_built_form
else {}
).items()
if k not in BUILT_FORM_MAPPINGS
}
new_wall_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_wall_construction] if
asset_list.landlord_wall_construction else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_wall_construction]
if asset_list.landlord_wall_construction
else {}
).items()
if k not in WALL_CONSTRUCTION_MAPPINGS
}
new_heating_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_heating_system] if
asset_list.landlord_heating_system else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_heating_system]
if asset_list.landlord_heating_system
else {}
).items()
if k not in HEATING_MAPPINGS
}
new_existing_pv_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_existing_pv]
if asset_list.landlord_existing_pv
else {}
).items()
if k not in EXISTING_PV_MAPPINGS
}
new_roof_construction_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_roof_construction] if
asset_list.landlord_roof_construction else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_roof_construction]
if asset_list.landlord_roof_construction
else {}
).items()
if k not in ROOF_CONSTRUCTION_MAPPINGS
}
@ -309,7 +232,7 @@ def app():
outcomes_address=outcomes_address,
outcomes_postcode=outcomes_postcode,
outcomes_houseno=outcomes_houseno,
outcomes_id=outcomes_id
outcomes_id=outcomes_id,
)
asset_list.flag_survey_master(
@ -343,14 +266,16 @@ def app():
skip = max(chunk_indexes)
if any(x in folder_contents for x in downloaded_files):
skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents])
skip = max(
[i for i in chunk_indexes if filename.format(i=i) in folder_contents]
)
for i in range(0, len(asset_list.standardised_asset_list), chunk_size):
print(f"Processing chunk {i} to {i + chunk_size}")
if skip is not None and not force_retrieve_data:
if i <= skip:
continue
chunk = asset_list.standardised_asset_list[i:i + chunk_size]
chunk = asset_list.standardised_asset_list[i : i + chunk_size]
epc_data_chunk, errors_chunk, no_epc_chunk = get_data(
df=chunk,
row_id_name=asset_list.DOMNA_PROPERTY_ID,
@ -362,7 +287,7 @@ def app():
built_form_column=AssetList.STANDARD_BUILT_FORM,
manual_uprn_map=manual_uprn_map,
epc_api_only=epc_api_only,
epc_auth_token=EPC_AUTH_TOKEN
epc_auth_token=EPC_AUTH_TOKEN,
)
# We now retrieve any failed properties
@ -385,7 +310,9 @@ def app():
# Append the failed data to the main data
# Store the chunk locally as a csv
pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False)
pd.DataFrame(epc_data_chunk).to_csv(
os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False
)
# Store the errors and no-data locally
with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f:
json.dump(errors_chunk, f)
@ -416,7 +343,9 @@ def app():
unique_recommendations = set()
for _, row in recommendations_df.iterrows():
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
unique_recommendations.update(
[rec["improvement-summary-text"] for rec in row["recommendations"]]
)
columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations)
transformed_data = []
@ -436,20 +365,24 @@ def app():
transformed_df = pd.DataFrame(transformed_data)
for col in [
"Floor insulation (solid floor)",
"Floor insulation", "Floor insulation (suspended floor)"
"Floor insulation",
"Floor insulation (suspended floor)",
]:
if col not in transformed_df.columns:
transformed_df[col] = False
transformed_df = transformed_df[
[
asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)",
"Floor insulation", "Floor insulation (suspended floor)"
asset_list.DOMNA_PROPERTY_ID,
"Floor insulation (solid floor)",
"Floor insulation",
"Floor insulation (suspended floor)",
]
]
transformed_df["epc_has_floor_recommendation"] = (
transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] |
transformed_df["Floor insulation (suspended floor)"]
transformed_df["Floor insulation (solid floor)"]
| transformed_df["Floor insulation"]
| transformed_df["Floor insulation (suspended floor)"]
)
# Get the find my epc data
@ -462,21 +395,20 @@ def app():
find_my_epc_data.append(
{
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID],
**x["find_my_epc_data"]
**x["find_my_epc_data"],
}
)
else:
find_my_epc_data.append(
{
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]
}
{asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]}
)
find_my_epc_data = pd.DataFrame(find_my_epc_data)
find_my_epc_data = find_my_epc_data.merge(
transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]],
how="left", on=asset_list.DOMNA_PROPERTY_ID
how="left",
on=asset_list.DOMNA_PROPERTY_ID,
)
# We check if we get the solar pv column:
@ -486,27 +418,33 @@ def app():
# Retrieve just the data we need
epc_df = epc_df[
[asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
].rename(
columns=asset_list.EPC_API_DATA_NAMES
)
].rename(columns=asset_list.EPC_API_DATA_NAMES)
# Look for columns not in the find my EPC data, which will have happened if we didn't
# retrieve it in the first place
missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns]
missed_find_epc_cols = [
c
for c in list(asset_list.FIND_EPC_DATA_NAMES.keys())
if c not in find_my_epc_data.columns
]
if missed_find_epc_cols:
for c in missed_find_epc_cols:
find_my_epc_data[c] = None
epc_df = epc_df.merge(
find_my_epc_data[
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())
]
.rename(columns=asset_list.FIND_EPC_DATA_NAMES),
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]
+ list(asset_list.FIND_EPC_DATA_NAMES.keys())
].rename(columns=asset_list.FIND_EPC_DATA_NAMES),
how="left",
on=asset_list.DOMNA_PROPERTY_ID
on=asset_list.DOMNA_PROPERTY_ID,
)
asset_list.merge_data(epc_df)
# asset_list.standardised_asset_list = asset_list.standardised_asset_list[
# asset_list.standardised_asset_list["domna_full_address"]
# != "120 Airdrie Crescent, Burnley, Lancashire"
# ]
asset_list.extract_attributes()
asset_list.identify_worktypes()
@ -516,7 +454,10 @@ def app():
asset_list.get_work_figures()
# Store as an excel
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
filename = (
os.path.join(data_folder, ".".join(data_filename.split(".")[:-1]))
+ " - Standardised.xlsx"
)
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
# Determine inspections priority
@ -540,26 +481,42 @@ def app():
# )
with pd.ExcelWriter(filename) as writer:
asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
asset_list.standardised_asset_list.to_excel(
writer, sheet_name="Standardised Asset List", index=False
)
if asset_list.block_analysis_df is not None:
asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
asset_list.block_analysis_df.to_excel(
writer, sheet_name="Block Analysis", index=False
)
# If we have outcomes, we add a tab with the outcomes
if not asset_list.outcomes_for_output.empty:
asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
asset_list.outcomes_for_output.to_excel(
writer, sheet_name="Outcomes", index=False
)
if not asset_list.unmatched_submissions.empty:
asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
asset_list.unmatched_submissions.to_excel(
writer, sheet_name="Unmatched Submissions", index=False
)
if not asset_list.outcomes_no_match.empty:
asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False)
asset_list.outcomes_no_match.to_excel(
writer, sheet_name="Unmatched Outcomes", index=False
)
if not asset_list.ecosurv_no_match.empty:
asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
asset_list.ecosurv_no_match.to_excel(
writer, sheet_name="Unmatched Ecosurv", index=False
)
if not asset_list.geographical_areas.empty:
asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
asset_list.geographical_areas.to_excel(
writer, sheet_name="Geographical Areas", index=False
)
# Store dupes
if asset_list.duplicated_addresses is not None:
if not asset_list.duplicated_addresses.empty:
asset_list.duplicated_addresses.to_excel(writer, sheet_name="Duplicate Properties", index=False)
asset_list.duplicated_addresses.to_excel(
writer, sheet_name="Duplicate Properties", index=False
)

View file

@ -1,7 +1,6 @@
postal
pandas
usaddress
pydantic-settings==2.6.0
epc-api-python==1.0.2
thefuzz
boto3
@ -10,6 +9,5 @@ openai>=1.3.5
tiktoken
msgpack
beautifulsoup4
pydantic>=1.10.7
typing-extensions>=4.5.0
requests>=2.28.2

22
backend/.env.local Normal file
View file

@ -0,0 +1,22 @@
DB_HOST=db
DB_PORT=5432
DB_NAME=tech_team_local_db
DB_USERNAME=postgres
DB_PASSWORD=makingwarmerhomes
#not used
GOOGLE_SOLAR_API_KEY="test"
SAP_PREDICTIONS_BUCKET="test"
CARBON_PREDICTIONS_BUCKET="test"
HEAT_PREDICTIONS_BUCKET="test"
HEATING_KWH_PREDICTIONS_BUCKET="test"
HOTWATER_KWH_PREDICTIONS_BUCKET="test"
API_KEY="test"
ENVIRONMENT="test"
SECRET_KEY="test"
PLAN_TRIGGER_BUCKET="test"
DATA_BUCKET="test"
EPC_AUTH_TOKEN="test"
ENGINE_SQS_URL="test"
ENERGY_ASSESSMENTS_BUCKET="test"

View file

@ -0,0 +1,7 @@
FROM public.ecr.aws/lambda/python:3.10
# Copy function code
COPY main.py .
# Set the handler
CMD ["main.handler"]

View file

@ -0,0 +1,20 @@
We have list of address as input.
It'll come in batches of the same post code and from then we want to somehow convert that into UPRN
if this lambda/function can do that we'll be speeding ahead
Energy Performance Information: https://epc.opendatacommunities.org/
guidance page: https://epc.opendatacommunities.org/docs/guidance#field_domestic_LMK_KEY
Example of past khalims code that he wrote some tests for: https://github.com/Hestia-Homes/Model/blob/941be42b83a590e838fd3ee475bfd1ff31438789/backend/tests/test_search_epc.py#L11
Example of EPC search: https://github.com/Hestia-Homes/Model/blob/941be42b83a590e838fd3ee475bfd1ff31438789/backend/SearchEpc.py#L118
Khalim has made a python package to help scrape data: https://github.com/KhalimCK/epc-api-python

View file

View file

@ -0,0 +1,567 @@
from epc_api.client import EpcClient
import os
from urllib.parse import urlencode
import pandas as pd
from difflib import SequenceMatcher
from tqdm import tqdm
from utils.logger import setup_logger
logger = setup_logger()
import re
EPC_AUTH_TOKEN = os.getenv(
"EPC_AUTH_TOKEN",
)
if EPC_AUTH_TOKEN is None:
raise RuntimeError("EPC_AUTH_TOKEN not defined in env")
import re
from difflib import SequenceMatcher
from typing import Set
def levenshtein(a: str, b: str) -> float:
"""
Address similarity score in [0, 1].
Strategy:
- Normalise
- Strongly penalise mismatched house/flat numbers
- Combine token overlap + character similarity
"""
def extract_number_sequence(s: str) -> list[str]:
return re.findall(r"\d+[a-z]?", s)
def extract_numbers(s: str) -> Set[str]:
return set(extract_number_sequence(s))
def tokenise(s: str) -> Set[str]:
return set(s.split())
def extract_building_number(s: str) -> str | None:
"""
Extract the main building number (NOT flat/unit).
Assumes formats like:
- '42 moreton road'
- 'flat 3 42 moreton road'
"""
tokens = s.split()
# remove flat/unit context
cleaned = []
skip_next = False
for t in tokens:
if t in ("flat", "apt", "apartment", "unit"):
skip_next = True
continue
if skip_next:
skip_next = False
continue
cleaned.append(t)
# first remaining number is building number
for t in cleaned:
if re.fullmatch(r"\d+[a-z]?", t):
return t
return None
a_norm = normalise_address(a)
b_norm = normalise_address(b)
# --- hard signal: numbers ---
nums_a = extract_numbers(a_norm)
nums_b = extract_numbers(b_norm)
if nums_a and not nums_b:
return 0.0
# No shared numbers at all → impossible match
if nums_a and nums_b and nums_a.isdisjoint(nums_b):
return 0.0
# 🔒 HARD GUARD: building number must match
bld_a = extract_building_number(a_norm)
bld_b = extract_building_number(b_norm)
if bld_a and bld_b and bld_a != bld_b:
return 0.0
# --- order-sensitive flat/building guard ---
seq_a = extract_number_sequence(a_norm)
seq_b = extract_number_sequence(b_norm)
has_flat_token_user = any(
tok in a_norm for tok in ("flat", "apt", "apartment", "unit")
)
has_flat_token_epc = "flat" in b_norm
if (
len(seq_a) == 2
and len(seq_b) >= 2
and has_flat_token_epc
and not has_flat_token_user
and seq_a != seq_b[:2]
):
return 0.0
# --- token similarity (order-independent) ---
toks_a = tokenise(a_norm)
toks_b = tokenise(b_norm)
if not toks_a or not toks_b:
token_score = 0.0
else:
token_score = len(toks_a & toks_b) / len(toks_a | toks_b)
# --- character similarity (soft signal) ---
char_score = SequenceMatcher(None, a_norm, b_norm).ratio()
# --- weighted blend ---
return round(
0.65 * token_score + 0.35 * char_score,
4,
)
def normalise_address(s: str) -> str:
"""
Canonical UK-focused address normalisation.
- Lowercases
- Removes punctuation (keeps / for flats)
- Normalises whitespace
- Applies synonym compression at token level
"""
if not s:
return ""
ADDRESS_SYNONYMS = {
# street types
"rd": "road",
"rd.": "road",
"st": "street",
"st.": "street",
"ave": "avenue",
"ave.": "avenue",
"ln": "lane",
"ln.": "lane",
"cres": "crescent",
"ct": "court",
"dr": "drive",
# flats / units
"apt": "flat",
"apartment": "flat",
"unit": "flat",
"ste": "suite",
# numbering noise
"no": "",
"no.": "",
}
# 1. lowercase
s = s.lower()
# 1.5 split digit-letter suffixes
s = re.sub(r"(\d+)([a-z])\b", r"\1 \2", s)
# 2. remove punctuation except /
s = re.sub(r"[^\w\s/]", " ", s)
# 3. normalise whitespace
s = re.sub(r"\s+", " ", s).strip()
# 4. tokenise + synonym normalisation
tokens = []
for tok in s.split():
replacement = ADDRESS_SYNONYMS.get(tok, tok)
if replacement:
tokens.append(replacement)
return " ".join(tokens)
def score_addresses(
df: pd.DataFrame,
user_address: str,
column: str = "address",
) -> pd.Series:
if column not in df.columns:
raise ValueError(f"Missing column: {column}")
return df[column].apply(lambda x: levenshtein(user_address, x))
def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
"""
Recursively fetch EPC data by postcode.
If results hit the size limit, retry with double size up to max_attempts.
"""
client = EpcClient(auth_token=EPC_AUTH_TOKEN)
url = os.path.join(client.domestic.host, "search")
if size:
url += "?" + urlencode({"size": size})
search_resp = client.domestic.call(
url=url,
method="get",
params={"postcode": postcode},
)
results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"])
row_count = len(results_df)
# If we hit the size limit, there *may* be more results
if row_count == size:
print(
f"⚠️ Warning: hit size limit ({size}) for postcode '{postcode}'. "
f"Attempt {attempt}/{max_attempts}."
)
if attempt < max_attempts:
print(f"🔁 Retrying with size={size * 2}")
return get_epc_data_with_postcode(
postcode=postcode,
size=size * 2,
attempt=attempt + 1,
max_attempts=max_attempts,
)
else:
print(
"🚨 Max attempts reached. Results may be truncated. "
"(Please do a manual review by the tech team.)"
)
return results_df
def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
"""
Returns True if all non-null UPRNs in df match the given uprn.
Returns False otherwise.
"""
if column not in df.columns:
return False
# Drop nulls and normalise to string
uprns = df[column].dropna().astype(str).str.strip().unique()
# No valid UPRNs to compare
if len(uprns) == 0:
return False
# Exactly one unique UPRN and it matches
return len(uprns) == 1 and uprns[0] == str(uprn)
def get_uprn_candidates(
df: pd.DataFrame,
user_address: str,
address_column: str = "address",
uprn_column: str = "uprn",
) -> pd.DataFrame:
"""
Annotate EPC results with lexicographical similarity scores and ranks.
Returns a DataFrame sorted by descending lexiscore.
DOES NOT choose or return a UPRN.
"""
if address_column not in df.columns:
raise ValueError(f"Missing column: {address_column}")
if uprn_column not in df.columns:
raise ValueError(f"Missing column: {uprn_column}")
out = df.copy()
user_norm = normalise_address(user_address)
out["lexiscore"] = out[address_column].apply(lambda x: levenshtein(user_norm, x))
# Normalise UPRN to string
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
# Rank: 1 = best match
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
return out.sort_values(
["lexirank", "lexiscore"],
ascending=[True, False],
)
def get_uprn(user_inputed_address: str, postcode: str):
"""
Return uprn (str)
Return False if failed to find a sensible matching epc
Return Nons when epc found but no UPRN
"""
df = get_epc_data_with_postcode(postcode=postcode)
if df.empty:
return None
scored_df = get_uprn_candidates(
df,
user_address=user_inputed_address,
)
# Best score
best_score = scored_df.iloc[0]["lexiscore"]
if best_score <= 0:
return None
# All rank-1 rows (possible draw)
top_rank_df = scored_df[scored_df["lexirank"] == 1]
# If rank-1 rows do not agree on a single UPRN → ambiguous
if not df_has_single_uprn(top_rank_df, uprn=top_rank_df.iloc[0]["uprn"]):
return None
address = top_rank_df["address"].values[0]
lexiscore = float(top_rank_df["lexiscore"].values[0])
logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
# Safe to return the agreed UPRN
found_uprn = top_rank_df.iloc[0]["uprn"]
if found_uprn == "":
return None
return found_uprn
def resolve_uprns_for_postcode_group(
group_df: pd.DataFrame,
epc_df: pd.DataFrame,
address_col: str = "Address 1",
) -> pd.DataFrame:
"""
Given:
- group_df: rows sharing the same postcode
- epc_df: EPC search results for that postcode
Returns:
group_df + found_uprn + diagnostics
"""
results = []
for _, row in group_df.iterrows():
user_address = str(row[address_col]).strip()
scored_df = get_uprn_candidates(
epc_df,
user_address=user_address,
)
if scored_df.empty:
results.append(
{
"found_uprn": None,
"best_match_uprn": None,
"best_match_address": None,
"best_match_lexiscore": None,
"status": "no_epc_candidates",
}
)
continue
best_score = scored_df.iloc[0]["lexiscore"]
if best_score <= 0:
results.append(
{
"found_uprn": None,
"best_match_uprn": None,
"best_match_address": None,
"best_match_lexiscore": best_score,
"status": "zero_score",
}
)
continue
top_rank_df = scored_df[scored_df["lexirank"] == 1]
if not df_has_single_uprn(top_rank_df, top_rank_df.iloc[0]["uprn"]):
results.append(
{
"found_uprn": None,
"best_match_uprn": top_rank_df.iloc[0]["uprn"],
"best_match_address": top_rank_df.iloc[0]["address"],
"best_match_lexiscore": best_score,
"status": "ambiguous",
}
)
continue
results.append(
{
"found_uprn": str(top_rank_df.iloc[0]["uprn"]),
"best_match_uprn": str(top_rank_df.iloc[0]["uprn"]),
"best_match_address": top_rank_df.iloc[0]["address"],
"best_match_lexiscore": best_score,
"status": "matched",
}
)
return pd.concat(
[group_df.reset_index(drop=True), pd.DataFrame(results)],
axis=1,
)
def test(a, b):
assert a == b, f"erorr: {a}{type(a)} != {b}: {type(b)}"
def run_all_test():
# Basic usage with different post codes styles
test(get_epc_data_with_postcode("b93 8sy").shape[0], 63)
test(get_epc_data_with_postcode("B938sy").shape[0], 63)
test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
test(get_uprn("68", "b93 8sy"), "100070989938")
test(get_uprn("68 Glendon Way", "b93 8sy"), "100070989938")
test(get_uprn("Flat A, 28, Nelgarde Road", "se6 4tf"), "100023278633")
test(get_uprn("28 A", "se6 4tf"), "100023278633")
test(get_uprn("28A", "se6 4tf"), "100023278633")
test(get_uprn("6 Aitken Close", "E8 4SQ"), False)
# unique case
test(get_uprn("Flat 5, 1, Semley Gate", "e9 5nh"), "10008238198")
test(get_uprn("5 , 1 Semley Gate", "e9 5nh"), "10008238198")
test(get_uprn("5 Semley Gate", "e9 5nh"), "10008238198")
test(get_uprn("1, 5 Semley Gate", "e9 5nh"), False)
test(
get_uprn("1 Semley Gate", "e9 5nh"), "10008238188"
) # this one return "flat 1, in 1 semley gate"
test(
get_uprn("48 Oswald Street", "E5 0BT"), False
) # this one return "flat 1, in 1 semley gate"
test(
get_uprn("42 Oswald Street", "E5 0BT"), False
) # this one return "flat 1, in 1 semley gate"
test(
get_uprn("46 Oswald Street", "E5 0BT"), False
) # this one return "flat 1, in 1 semley gate"
get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street")
get_uprn_candidates(
get_epc_data_with_postcode("Cr2 7dl"),
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",
)
if __name__ == "__main__":
INPUT_FILE = "hackney.xlsx"
ADDRESS_COL = "Address 1"
POSTCODE_COL = "Postcode"
UPRN_COL = "UPRN"
df = pd.read_excel(INPUT_FILE)
failures = []
for _, row in tqdm(
df.iterrows(),
total=len(df),
desc="Auditing UPRNs",
):
input_address = str(row[ADDRESS_COL]).strip()
postcode = str(row[POSTCODE_COL]).strip()
expected_uprn = None if pd.isna(row[UPRN_COL]) else str(int(row[UPRN_COL]))
try:
epc_df = get_epc_data_with_postcode(postcode)
if epc_df.empty:
failures.append(
{
**row.to_dict(),
"found_uprn": None,
"best_match_uprn": None,
"best_match_address": None,
"best_match_lexiscore": None,
"status": "no_epc_results",
}
)
continue
scored_df = get_uprn_candidates(
epc_df,
user_address=input_address,
)
best_row = scored_df.iloc[0]
best_match_uprn = str(best_row["uprn"])
best_match_address = best_row["address"]
best_match_lexiscore = round(float(best_row["lexiscore"]), 4)
found_uprn = get_uprn(input_address, postcode)
except Exception as e:
failures.append(
{
**row.to_dict(),
"found_uprn": None,
"best_match_uprn": None,
"best_match_address": None,
"best_match_lexiscore": None,
"status": "exception",
"error": str(e),
}
)
continue
found_uprn_norm = None if not found_uprn else str(found_uprn)
if found_uprn_norm != expected_uprn:
failures.append(
{
**row.to_dict(),
"found_uprn": found_uprn_norm,
"best_match_uprn": best_match_uprn,
"best_match_address": best_match_address,
"best_match_lexiscore": best_match_lexiscore,
"status": ("no_match" if found_uprn_norm is None else "mismatch"),
}
)
failures_df = pd.DataFrame(failures)
print("===================================")
print(f"Total rows : {len(df)}")
print(f"Failures : {len(failures_df)}")
print("===================================")
failures_df.to_excel(
"hackney_uprn_failures.xlsx",
index=False,
)
def handler(event, context):
print("hello world")
return {"statusCode": 200, "body": "hello world"}
# TO do function dispatcher,
# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate)
# fix that
# Look again at flat 1
# pandas reader the seperate postcode_splitter
# dump into s3

View file

@ -0,0 +1,17 @@
import pandas as pd
# use Address 1
junte_df = pd.read_excel("hackney_uprn_failures.xlsx")
# use domna_address_1
khalim_df = pd.read_excel("khalim_standard.xlsx")
combined_df = junte_df.merge(khalim_df, how="left", left_on="Address 1", right_on='domna_address_1')
# Find the row in khalim_df that does not app
result = combined_df[~pd.isnull(combined_df["epc_os_uprn"])]

View file

@ -0,0 +1,40 @@
# tests/test_address_to_uprn_csv.py
import csv
import pytest
from pathlib import Path
from backend.address2UPRN.main import get_uprn
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
def load_test_cases():
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
return [
pytest.param(
row["User Input"],
row["Postcode"],
row["Manual UPRN Code"],
id=f'{row["User Input"]} [{row["Postcode"]}]',
)
for row in reader
]
@pytest.mark.parametrize(
"user_input,postcode,expected_uprn",
load_test_cases(),
)
def test_uprn_resolution_matches_manual(
user_input: str,
postcode: str,
expected_uprn: str,
):
from utils.logger import setup_logger
uprn = get_uprn(user_input, postcode)
if uprn:
assert uprn == expected_uprn
else:
assert str(uprn) == expected_uprn

View file

@ -0,0 +1,366 @@
User Input,Postcode,Manual UPRN Code
47 The Fairway,OX16 0RR,100120771697
11 REGENT COURT,SL1 3LG,100081041562
3/137a Windmill Road,TW8 9NH,100021516998
Flat 33,SW18 4BE,100023328943
FLAT 1 Brendon Grove,N2 8JE,200013412
Flat 15,KT8 2NE,100062123759
FLAT 5 Stonehill Road,W4 3AH,100021589829
10 Douglas Court,SL7 1UQ,100081278099
1 Windmill Road,HP17 8JA,766034606
31 Denewood,HP13 7LH,100081095964
"10, Greenways Drive",TW4 5DD,10091597009
Flat 10,W4 3AH,"100021589834"
Flat 11,TW4 5DD,10091597010
Flat 11,W4 3AH,100021589835
"12, Greenways Drive",TW4 5DD,10091597011
"Flat 12, Forbes House",W4 3AH,100021589836
FLAT 1 Goodstone Court,HA1 4FL,10070269053
Flat 13,TW4 5DD,10091597012
Flat 13,W4 3AH,100021589837
Flat 14,TW4 5DD,10091597013
Flat 14,W4 3AH,100021589838
Flat 15,TW4 5DD,10091597014
Flat 15,W4 3AH,100021589839
Flat 16,TW4 5DD,"10091597015"
Flat 16,W4 3AH,100021589840
Flat 17,TW4 5DD,10091597016
Flat 17,W4 3AH,100021589841
Flat 18,TW4 5DD,10091597017
Flat 19,W4 3AH,100021589843
Flat 20,W4 3AH,100021589844
Flat 21,W4 3AH,100021589845
Flat 22,W4 3AH,100021589846
FLAT 2 Goodstone Court,HA1 4FL,10070269054
Flat 23,W4 3AH,100021589847
Flat 24,W4 3AH,100021589848
"30c, Bosanquet Close",UB8 3PE,100021475316
"30e, Bosanquet Close",UB8 3PE,100021475318
FLAT 3 Goodstone Court,HA1 4FL,10070269055
FLAT 4 Goodstone Court,HA1 4FL,10070269056
FLAT 5 Goodstone Court,HA1 4FL,10070269057
FLAT 6 Goodstone Court,HA1 4FL,10070269058
FLAT 7 Goodstone Court,HA1 4FL,10070269059
FLAT 8 Goodstone Court,HA1 4FL,10070269060
FLAT 9 Goodstone Court,HA1 4FL,10070269061
FLAT 10 Goodstone Court,HA1 4FL,10070269062
FLAT 11 Goodstone Court,HA1 4FL,10070269063
FLAT 12 Goodstone Court,HA1 4FL,10070269064
FLAT 13 Goodstone Court,HA1 4FL,10070269065
FLAT 14 Goodstone Court,HA1 4FL,10070269066
FLAT 15 Goodstone Court,HA1 4FL,10070269067
FLAT 16 Goodstone Court,HA1 4FL,10070269068
FLAT 17 Goodstone Court,HA1 4FL,10070269069
FLAT 18 Goodstone Court,HA1 4FL,10070269070
FLAT 19 Goodstone Court,HA1 4FL,10070269071
FLAT 20 Goodstone Court,HA1 4FL,10070269072
FLAT 21 Goodstone Court,HA1 4FL,10070269073
FLAT 22 Goodstone Court,HA1 4FL,10070269074
FLAT 23 Goodstone Court,HA1 4FL,10070269075
FLAT 24 Goodstone Court,HA1 4FL,10070269076
FLAT 25 Goodstone Court,HA1 4FL,10070269077
FLAT 26 Goodstone Court,HA1 4FL,10070269078
FLAT 27 Goodstone Court,HA1 4FL,10070269079
FLAT 28 Goodstone Court,HA1 4FL,10070269080
FLAT 29 Goodstone Court,HA1 4FL,10070269081
FLAT 30 Goodstone Court,HA1 4FL,10070269082
FLAT 31 Goodstone Court,HA1 4FL,10070269083
FLAT 32 Goodstone Court,HA1 4FL,10070269084
FLAT 33 Goodstone Court,HA1 4FL,10070269085
FLAT 34 Goodstone Court,HA1 4FL,10070269086
FLAT 35 Goodstone Court,HA1 4FL,10070269087
FLAT 36 Goodstone Court,HA1 4FL,10070269088
FLAT 37 Goodstone Court,HA1 4FL,10070269089
FLAT 38 Goodstone Court,HA1 4FL,10070269090
FLAT 39 Goodstone Court,HA1 4FL,10070269091
FLAT 40 Goodstone Court,HA1 4FL,10070269092
FLAT 41 Goodstone Court,HA1 4FL,10070269093
FLAT 42 Goodstone Court,HA1 4FL,10070269094
FLAT 43 Goodstone Court,HA1 4FL,10070269095
"13 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778260
"14 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778259
"15 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778258
"16 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778263
"17 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778262
"18 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778261
"19 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778266
"20 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778265
"21 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778264
90a Murray Road,W5 4DA,12135293
"Flat 1, 6 Wolverton Gardens",W5 3LJ,"12119972"
"1, Monsted House",UB1 1FG,12189944
"10, Monsted House",UB1 1FG,12189953
"20, Monsted House",UB1 1FG,12189963
"2, Monsted House",UB1 1FG,12189945
"3, Monsted House",UB1 1FG,12189946
"4, Monsted House",UB1 1FG,12189947
"5, Monsted House",UB1 1FG,12189948
"6, Monsted House",UB1 1FG,12189949
"7, Monsted House",UB1 1FG,12189950
"8, Monsted House",UB1 1FG,12189951
"9, Monsted House",UB1 1FG,12189952
"1 Cullis House, 1, Accolade Avenue",UB1 1FH,12189904
"2 Cullis House, 1, Accolade Avenue",UB1 1FH,12189905
"3 Cullis House, 1, Accolade Avenue",UB1 1FH,12189906
"4 Cullis House, 1, Accolade Avenue",UB1 1FH,12189907
"5 Cullis House, 1, Accolade Avenue",UB1 1FH,12189908
"6 Cullis House, 1, Accolade Avenue",UB1 1FH,12189909
1 Genteel House Samara Drive,UB1 1FJ,12189835
2 Genteel House Samara Drive,UB1 1FJ,12189836
3 Genteel House Samara Drive,UB1 1FJ,12189837
4 Genteel House Samara Drive,UB1 1FJ,12189838
5 Genteel House Samara Drive,UB1 1FJ,12189839
6 Genteel House Samara Drive,UB1 1FJ,12189840
7 Genteel House Samara Drive,UB1 1FJ,12189841
8 Genteel House Samara Drive,UB1 1FJ,12189842
9 Genteel House Samara Drive,UB1 1FJ,12189843
10 Genteel House Samara Drive,UB1 1FJ,12189844
1 ASH TREE HOUSE,SE5 0TE,None
"Flat 1 Ash Tree House, 2, Thompson Avenue",SE5 0TE,10009803979
3 ASH TREE HOUSE,SE5 0TE,None
Flat 3 ASH TREE HOUSE,SE5 0TE,10009803981
5 ASH TREE HOUSE,SE5 0TE,None
Flat 5 ASH TREE HOUSE,SE5 0TE,10009803983
Flat 8 ASH TREE HOUSE,SE5 0TE,10009803986
8 ASH TREE HOUSE,SE5 0TE,None
Flat 12 ASH TREE HOUSE,SE5 0TE,10009803990
12 ASH TREE HOUSE,SE5 0TE,None
FLAT 1 599 HARROW ROAD,W10 4RA,217113930
FLAT 2 599 HARROW ROAD,W10 4RA,217113931
FLAT 3 599 HARROW ROAD,W10 4RA,None
FLAT 4 599 HARROW ROAD,W10 4RA,None
FLAT 5 599 HARROW ROAD,W10 4RA,217113934
FLAT 6 599 HARROW ROAD,W10 4RA,None
FLAT 7 599 HARROW ROAD,W10 4RA,None
FLAT 8 599 HARROW ROAD,W10 4RA,None
"Flat 1, Ohio Building",SE13 7RX,10023226256
"Flat 2, Ohio Building",SE13 7RX,10023226257
"Apartment 1 Block B, 105, Benwell Road",N7 7BW,10012792307
"Apartment 2 Block B, 105, Benwell Road",N7 7BW,10012792308
"Apartment 3 Block B, 105, Benwell Road",N7 7BW,10012792309
"Apartment 4 Block B, 105, Benwell Road",N7 7BW,10012792310
"Apartment 5 Block B, 105, Benwell Road",N7 7BW,10012792311
"Apartment 6 Block B, 105, Benwell Road",N7 7BW,10012792312
"Apartment 7 Block B, 105, Benwell Road",N7 7BW,10012792313
"Apartment 8 Block B, 105, Benwell Road",N7 7BW,10012792314
"Apartment 9 Block B, 105, Benwell Road",N7 7BW,10012792315
"Apartment 10 Block B, 105, Benwell Road",N7 7BW,10012792316
"Apartment 11 Block B, 105, Benwell Road",N7 7BW,10012792317
"Apartment 12 Block B, 105, Benwell Road",N7 7BW,10012792318
"Apartment 13 Block B, 105, Benwell Road",N7 7BW,10012792319
"Apartment 1 Block D, 32, Hornsey Road",N7 7AT,10012792366
"Apartment 2 Block D, 32, Hornsey Road",N7 7AT,10012792367
"Apartment 3 Block D, 32, Hornsey Road",N7 7AT,10012792368
"Apartment 4 Block D, 32, Hornsey Road",N7 7AT,10012792369
"Apartment 5 Block D, 32, Hornsey Road",N7 7AT,10012792370
"Apartment 6 Block D, 32, Hornsey Road",N7 7AT,"10012792371"
"Apartment 7 Block D, 32, Hornsey Road",N7 7AT,10012792372
"Apartment 8 Block D, 32, Hornsey Road",N7 7AT,10012792373
"Apartment 9 Block D, 32, Hornsey Road",N7 7AT,10012792374
"Apartment 10 Block D, 32, Hornsey Road",N7 7AT,10012792375
"Apartment 11 Block D, 32, Hornsey Road",N7 7AT,10012792376
"Apartment 12 Block D, 32, Hornsey Road",N7 7AT,10012792377
"Apartment 13 Block D, 32, Hornsey Road",N7 7AT,10012792378
"Apartment 14 Block D, 32, Hornsey Road",N7 7AT,10012792379
"Apartment 15 Block D, 32, Hornsey Road",N7 7AT,10012792380
"Apartment 16 Block D, 32, Hornsey Road",N7 7AT,"10012792381"
"Apartment 17Block D, 32, Hornsey Road",N7 7AT,10012792382
"Apartment 18 Block D, 32, Hornsey Road",N7 7AT,10012792383
24b Honley Road,SE6 2HZ,None
FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
2 COLLEGE HOUSE,CM7 1JS,100091449870
3 COLLEGE HOUSE,CM7 1JS,100091449871
1 Anita Street,M4 5DU,None
2 Anita Street,M4 5DU,77123061
5 Anita Street,M4 5DU,77123081
6 Anita Street,M4 5DU,77123082
8 Anita Street,M4 5DU,None
9 Anita Street,M4 5DU,None
10 Anita Street,M4 5DU,77123051
12 Anita Street,M4 5DU,77123053
19 Anita Street,M4 5DU,None
22 Anita Street,M4 5DU,None
26 Anita Street,M4 5DU,77123068
28 Anita Street,M4 5DU,None
30 Anita Street,M4 5DU,None
32 Anita Street,M4 5DU,None
33 Anita Street,M4 5DU,77123076
34 Anita Street,M4 5DU,None
35 Anita Street,M4 5DU,77123078
36 Anita Street,M4 5DU,77123079
23 George Leigh Street,M4 5DR,77123171
25 George Leigh Street,M4 5DR,None
35 George Leigh Street,M4 5DR,77123177
39 George Leigh Street,M4 5DR,77123179
41 George Leigh Street,M4 5DR,None
43 George Leigh Street,M4 5DR,None
49 George Leigh Street,M4 5DR,None
51 George Leigh Street,M4 5DR,77123185
55 George Leigh Street,M4 5DR,None
57 George Leigh Street,M4 5DR,None
"1a, Victoria Square",M4 5DX,77211153
2a Victoria Square ,M4 5DX,None
"4a, Victoria Square",M4 5DX,77211155
5a Victoria Square,M4 5DX,77211156
6a Victoria Square,M4 5DX,77211157
7a Victoria Square,M4 5DX,77211158
8a Victoria Square,M4 5DX,77211159
9a Victoria Square,M4 5DX,77211160
10a Victoria Square,M4 5DX,77211161
11a Victoria Square,M4 5DX,77211162
12a Victoria Square,M4 5DX,77211163
13a Victoria Square,M4 5DX,77211164
14a Victoria Square,M4 5DX,77211165
15a Victoria Square,M4 5DX,77211166
16a Victoria Square,M4 5DX,77211167
17a Victoria Square,M4 5DX,77211168
18a Victoria Square,M4 5DX,77211169
19a Victoria Square,M4 5DX,77211170
20a Victoria Square,M4 5DX,77211171
21a Victoria Square,M4 5DY,77211172
22a Victoria Square,M4 5DY,None
23a Victoria Square,M4 5DY,77211174
24a Victoria Square,M4 5DY,77211175
25a Victoria Square,M4 5DY,77211176
26a Victoria Square,M4 5DY,77211177
27a Victoria Square,M4 5DY,77211178
28a Victoria Square,M4 5DY,None
29a Victoria Square,M4 5DY,77211180
30a Victoria Square,M4 5DY,77211181
31a Victoria Square,M4 5DY,77211182
32a Victoria Square,M4 5DY,77211183
33a Victoria Square,M4 5DY,77211184
34a Victoria Square,M4 5DY,77211185
35a Victoria Square,M4 5DY,None
36a Victoria Square,M4 5DY,77211187
37a Victoria Square,M4 5DY,77211188
38a Victoria Square,M4 5DY,77211189
39a Victoria Square,M4 5DY,77211190
40a Victoria Square,M4 5DY,None
41a Victoria Square,M4 5DY,77211192
42a Victoria Square,M4 5DY,77211193
43a Victoria Square,M4 5DY,77211194
44a Victoria Square,M4 5DY,77211195
45a Victoria Square,M4 5DY,77211196
46a Victoria Square,M4 5DY,77211197
47a Victoria Square,M4 5DY,77211198
48a Victoria Square,M4 5DY,77211199
49a Victoria Square,M4 5DY,77211200
50a Victoria Square,M4 5DY,77211201
51a Victoria Square,M4 5DY,77211202
52a Victoria Square,M4 5DY,77211203
53a Victoria Square,M4 5DY,77211204
54a Victoria Square,M4 5DY,77211205
55a Victoria Square,M4 5DY,77211206
56a Victoria Square,M4 5DZ,77211207
57a Victoria Square,M4 5DZ,None
58a Victoria Square,M4 5DZ,77211209
59a Victoria Square,M4 5DZ,77211210
60a Victoria Square,M4 5DZ,77211211
61a Victoria Square,M4 5DZ,77211212
62a Victoria Square,M4 5DZ,77211213
63a Victoria Square,M4 5DZ,None
64a Victoria Square,M4 5DZ,77211215
65a Victoria Square,M4 5DZ,77211216
66a Victoria Square,M4 5DZ,None
67a Victoria Square,M4 5DZ,None
68a Victoria Square,M4 5DZ,77211219
69a Victoria Square,M4 5DZ,77211220
70a Victoria Square,M4 5DZ,77211221
71a Victoria Square,M4 5DZ,77211222
72a Victoria Square,M4 5DZ,77211223
73a Victoria Square,M4 5DZ,77211224
74a Victoria Square,M4 5DZ,None
75a Victoria Square,M4 5DZ,77211226
76a Victoria Square,M4 5DZ,77211227
77a Victoria Square,M4 5DZ,None
78a Victoria Square,M4 5DZ,77211229
79a Victoria Square,M4 5DZ,77211230
80a Victoria Square,M4 5DZ,77211231
81a Victoria Square,M4 5DZ,77211232
82 Victoria Square,M4 5DZ,None
83a Victoria Square,M4 5DZ,77211234
84a Victoria Square,M4 5DZ,None
85a Victoria Square,M4 5DZ,77211236
86a Victoria Square,M4 5DZ,77211237
87a Victoria Square,M4 5DZ,77211238
88a Victoria Square,M4 5DZ,None
89a Victoria Square,M4 5DZ,77211240
90a Victoria Square,M4 5DZ,77211241
91a Victoria Square,M4 5DZ,77211242
92a Victoria Square,M4 5DZ,77211243
93a Victoria Square,M4 5EA,77211244
94a Victoria Square,M4 5EA,None
95a Victoria Square,M4 5EA,77211246
96a Victoria Square,M4 5EA,77211247
97a Victoria Square,M4 5EA,77211248
98a Victoria Square,M4 5EA,77211249
99a Victoria Square,M4 5EA,77211250
100a Victoria Square,M4 5EA,77211251
101a Victoria Square,M4 5EA,None
102a Victoria Square,M4 5EA,None
103a Victoria Square,M4 5EA,77211254
104a Victoria Square,M4 5EA,77211255
105a Victoria Square,M4 5EA,None
106a Victoria Square,M4 5EA,77211257
107a Victoria Square,M4 5EA,77211258
108a Victoria Square,M4 5EA,77211259
109a Victoria Square,M4 5EA,77211260
110a Victoria Square,M4 5EA,77211261
111a Victoria Square,M4 5EA,77211262
112a Victoria Square,M4 5EA,None
113a Victoria Square,M4 5EA,77211264
114a Victoria Square,M4 5EA,77211265
115a Victoria Square,M4 5EA,77211266
116a Victoria Square,M4 5EA,77211267
117a Victoria Square,M4 5EA,None
118a Victoria Square,M4 5EA,None
119a Victoria Square,M4 5EA,77211270
120a Victoria Square,M4 5EA,77211271
121a Victoria Square,M4 5EA,77211272
122a Victoria Square,M4 5EA,77211273
123a Victoria Square,M4 5EA,77211274
124a Victoria Square,M4 5EA,None
125a Victoria Square,M4 5EA,77211276
126a Victoria Square,M4 5EA,77211277
127a Victoria Square,M4 5EA,77211278
128a Victoria Square,M4 5EA,77211279
129a Victoria Square,M4 5EA,77211280
130a Victoria Square,M4 5FA,77211281
131a Victoria Square,M4 5FA,77211282
132a Victoria Square,M4 5FA,77211283
133a Victoria Square,M4 5FA,None
134a Victoria Square,M4 5FA,77211285
135a Victoria Square,M4 5FA,77211286
136a Victoria Square,M4 5FA,77211287
137a Victoria Square,M4 5FA,77211288
138a Victoria Square,M4 5FA,77211289
139a Victoria Square,M4 5FA,77211290
140a Victoria Square,M4 5FA,77211291
141a Victoria Square,M4 5FA,77211292
142a Victoria Square,M4 5FA,77211293
143a Victoria Square,M4 5FA,77211294
144a Victoria Square,M4 5FA,77211295
145a Victoria Square,M4 5FA,None
146a Victoria Square,M4 5FA,77211297
147a Victoria Square,M4 5FA,77211298
148a Victoria Square,M4 5FA,77211299
149a Victoria Square,M4 5FA,77211300
150a Victoria Square,M4 5FA,77211301
151a Victoria Square,M4 5FA,None
152a Victoria Square,M4 5FA,77211303
153a Victoria Square,M4 5FA,None
154a Victoria Square,M4 5FA,77211305
155a Victoria Square,M4 5FA,None
156a Victoria Square,M4 5FA,77211307
157a Victoria Square,M4 5FA,77211308
158a Victoria Square,M4 5FA,77211309
159a Victoria Square,M4 5FA,None
160a Victoria Square,M4 5FA,77211311
161a Victoria Square,M4 5FA,None
162a Victoria Square,M4 5FA,None
163a Victoria Square,M4 5FA,77211314
164a Victoria Square,M4 5FA,77211315
165a Victoria Square,M4 5FA,77211316
166a Victoria Square,M4 5FA,None
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None
1 User Input Postcode Manual UPRN Code
2 47 The Fairway OX16 0RR 100120771697
3 11 REGENT COURT SL1 3LG 100081041562
4 3/137a Windmill Road TW8 9NH 100021516998
5 Flat 33 SW18 4BE 100023328943
6 FLAT 1 Brendon Grove N2 8JE 200013412
7 Flat 15 KT8 2NE 100062123759
8 FLAT 5 Stonehill Road W4 3AH 100021589829
9 10 Douglas Court SL7 1UQ 100081278099
10 1 Windmill Road HP17 8JA 766034606
11 31 Denewood HP13 7LH 100081095964
12 10, Greenways Drive TW4 5DD 10091597009
13 Flat 10 W4 3AH 100021589834
14 Flat 11 TW4 5DD 10091597010
15 Flat 11 W4 3AH 100021589835
16 12, Greenways Drive TW4 5DD 10091597011
17 Flat 12, Forbes House W4 3AH 100021589836
18 FLAT 1 Goodstone Court HA1 4FL 10070269053
19 Flat 13 TW4 5DD 10091597012
20 Flat 13 W4 3AH 100021589837
21 Flat 14 TW4 5DD 10091597013
22 Flat 14 W4 3AH 100021589838
23 Flat 15 TW4 5DD 10091597014
24 Flat 15 W4 3AH 100021589839
25 Flat 16 TW4 5DD 10091597015
26 Flat 16 W4 3AH 100021589840
27 Flat 17 TW4 5DD 10091597016
28 Flat 17 W4 3AH 100021589841
29 Flat 18 TW4 5DD 10091597017
30 Flat 19 W4 3AH 100021589843
31 Flat 20 W4 3AH 100021589844
32 Flat 21 W4 3AH 100021589845
33 Flat 22 W4 3AH 100021589846
34 FLAT 2 Goodstone Court HA1 4FL 10070269054
35 Flat 23 W4 3AH 100021589847
36 Flat 24 W4 3AH 100021589848
37 30c, Bosanquet Close UB8 3PE 100021475316
38 30e, Bosanquet Close UB8 3PE 100021475318
39 FLAT 3 Goodstone Court HA1 4FL 10070269055
40 FLAT 4 Goodstone Court HA1 4FL 10070269056
41 FLAT 5 Goodstone Court HA1 4FL 10070269057
42 FLAT 6 Goodstone Court HA1 4FL 10070269058
43 FLAT 7 Goodstone Court HA1 4FL 10070269059
44 FLAT 8 Goodstone Court HA1 4FL 10070269060
45 FLAT 9 Goodstone Court HA1 4FL 10070269061
46 FLAT 10 Goodstone Court HA1 4FL 10070269062
47 FLAT 11 Goodstone Court HA1 4FL 10070269063
48 FLAT 12 Goodstone Court HA1 4FL 10070269064
49 FLAT 13 Goodstone Court HA1 4FL 10070269065
50 FLAT 14 Goodstone Court HA1 4FL 10070269066
51 FLAT 15 Goodstone Court HA1 4FL 10070269067
52 FLAT 16 Goodstone Court HA1 4FL 10070269068
53 FLAT 17 Goodstone Court HA1 4FL 10070269069
54 FLAT 18 Goodstone Court HA1 4FL 10070269070
55 FLAT 19 Goodstone Court HA1 4FL 10070269071
56 FLAT 20 Goodstone Court HA1 4FL 10070269072
57 FLAT 21 Goodstone Court HA1 4FL 10070269073
58 FLAT 22 Goodstone Court HA1 4FL 10070269074
59 FLAT 23 Goodstone Court HA1 4FL 10070269075
60 FLAT 24 Goodstone Court HA1 4FL 10070269076
61 FLAT 25 Goodstone Court HA1 4FL 10070269077
62 FLAT 26 Goodstone Court HA1 4FL 10070269078
63 FLAT 27 Goodstone Court HA1 4FL 10070269079
64 FLAT 28 Goodstone Court HA1 4FL 10070269080
65 FLAT 29 Goodstone Court HA1 4FL 10070269081
66 FLAT 30 Goodstone Court HA1 4FL 10070269082
67 FLAT 31 Goodstone Court HA1 4FL 10070269083
68 FLAT 32 Goodstone Court HA1 4FL 10070269084
69 FLAT 33 Goodstone Court HA1 4FL 10070269085
70 FLAT 34 Goodstone Court HA1 4FL 10070269086
71 FLAT 35 Goodstone Court HA1 4FL 10070269087
72 FLAT 36 Goodstone Court HA1 4FL 10070269088
73 FLAT 37 Goodstone Court HA1 4FL 10070269089
74 FLAT 38 Goodstone Court HA1 4FL 10070269090
75 FLAT 39 Goodstone Court HA1 4FL 10070269091
76 FLAT 40 Goodstone Court HA1 4FL 10070269092
77 FLAT 41 Goodstone Court HA1 4FL 10070269093
78 FLAT 42 Goodstone Court HA1 4FL 10070269094
79 FLAT 43 Goodstone Court HA1 4FL 10070269095
80 13 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778260
81 14 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778259
82 15 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778258
83 16 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778263
84 17 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778262
85 18 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778261
86 19 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778266
87 20 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778265
88 21 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778264
89 90a Murray Road W5 4DA 12135293
90 Flat 1, 6 Wolverton Gardens W5 3LJ 12119972
91 1, Monsted House UB1 1FG 12189944
92 10, Monsted House UB1 1FG 12189953
93 20, Monsted House UB1 1FG 12189963
94 2, Monsted House UB1 1FG 12189945
95 3, Monsted House UB1 1FG 12189946
96 4, Monsted House UB1 1FG 12189947
97 5, Monsted House UB1 1FG 12189948
98 6, Monsted House UB1 1FG 12189949
99 7, Monsted House UB1 1FG 12189950
100 8, Monsted House UB1 1FG 12189951
101 9, Monsted House UB1 1FG 12189952
102 1 Cullis House, 1, Accolade Avenue UB1 1FH 12189904
103 2 Cullis House, 1, Accolade Avenue UB1 1FH 12189905
104 3 Cullis House, 1, Accolade Avenue UB1 1FH 12189906
105 4 Cullis House, 1, Accolade Avenue UB1 1FH 12189907
106 5 Cullis House, 1, Accolade Avenue UB1 1FH 12189908
107 6 Cullis House, 1, Accolade Avenue UB1 1FH 12189909
108 1 Genteel House Samara Drive UB1 1FJ 12189835
109 2 Genteel House Samara Drive UB1 1FJ 12189836
110 3 Genteel House Samara Drive UB1 1FJ 12189837
111 4 Genteel House Samara Drive UB1 1FJ 12189838
112 5 Genteel House Samara Drive UB1 1FJ 12189839
113 6 Genteel House Samara Drive UB1 1FJ 12189840
114 7 Genteel House Samara Drive UB1 1FJ 12189841
115 8 Genteel House Samara Drive UB1 1FJ 12189842
116 9 Genteel House Samara Drive UB1 1FJ 12189843
117 10 Genteel House Samara Drive UB1 1FJ 12189844
118 1 ASH TREE HOUSE SE5 0TE None
119 Flat 1 Ash Tree House, 2, Thompson Avenue SE5 0TE 10009803979
120 3 ASH TREE HOUSE SE5 0TE None
121 Flat 3 ASH TREE HOUSE SE5 0TE 10009803981
122 5 ASH TREE HOUSE SE5 0TE None
123 Flat 5 ASH TREE HOUSE SE5 0TE 10009803983
124 Flat 8 ASH TREE HOUSE SE5 0TE 10009803986
125 8 ASH TREE HOUSE SE5 0TE None
126 Flat 12 ASH TREE HOUSE SE5 0TE 10009803990
127 12 ASH TREE HOUSE SE5 0TE None
128 FLAT 1 599 HARROW ROAD W10 4RA 217113930
129 FLAT 2 599 HARROW ROAD W10 4RA 217113931
130 FLAT 3 599 HARROW ROAD W10 4RA None
131 FLAT 4 599 HARROW ROAD W10 4RA None
132 FLAT 5 599 HARROW ROAD W10 4RA 217113934
133 FLAT 6 599 HARROW ROAD W10 4RA None
134 FLAT 7 599 HARROW ROAD W10 4RA None
135 FLAT 8 599 HARROW ROAD W10 4RA None
136 Flat 1, Ohio Building SE13 7RX 10023226256
137 Flat 2, Ohio Building SE13 7RX 10023226257
138 Apartment 1 Block B, 105, Benwell Road N7 7BW 10012792307
139 Apartment 2 Block B, 105, Benwell Road N7 7BW 10012792308
140 Apartment 3 Block B, 105, Benwell Road N7 7BW 10012792309
141 Apartment 4 Block B, 105, Benwell Road N7 7BW 10012792310
142 Apartment 5 Block B, 105, Benwell Road N7 7BW 10012792311
143 Apartment 6 Block B, 105, Benwell Road N7 7BW 10012792312
144 Apartment 7 Block B, 105, Benwell Road N7 7BW 10012792313
145 Apartment 8 Block B, 105, Benwell Road N7 7BW 10012792314
146 Apartment 9 Block B, 105, Benwell Road N7 7BW 10012792315
147 Apartment 10 Block B, 105, Benwell Road N7 7BW 10012792316
148 Apartment 11 Block B, 105, Benwell Road N7 7BW 10012792317
149 Apartment 12 Block B, 105, Benwell Road N7 7BW 10012792318
150 Apartment 13 Block B, 105, Benwell Road N7 7BW 10012792319
151 Apartment 1 Block D, 32, Hornsey Road N7 7AT 10012792366
152 Apartment 2 Block D, 32, Hornsey Road N7 7AT 10012792367
153 Apartment 3 Block D, 32, Hornsey Road N7 7AT 10012792368
154 Apartment 4 Block D, 32, Hornsey Road N7 7AT 10012792369
155 Apartment 5 Block D, 32, Hornsey Road N7 7AT 10012792370
156 Apartment 6 Block D, 32, Hornsey Road N7 7AT 10012792371
157 Apartment 7 Block D, 32, Hornsey Road N7 7AT 10012792372
158 Apartment 8 Block D, 32, Hornsey Road N7 7AT 10012792373
159 Apartment 9 Block D, 32, Hornsey Road N7 7AT 10012792374
160 Apartment 10 Block D, 32, Hornsey Road N7 7AT 10012792375
161 Apartment 11 Block D, 32, Hornsey Road N7 7AT 10012792376
162 Apartment 12 Block D, 32, Hornsey Road N7 7AT 10012792377
163 Apartment 13 Block D, 32, Hornsey Road N7 7AT 10012792378
164 Apartment 14 Block D, 32, Hornsey Road N7 7AT 10012792379
165 Apartment 15 Block D, 32, Hornsey Road N7 7AT 10012792380
166 Apartment 16 Block D, 32, Hornsey Road N7 7AT 10012792381
167 Apartment 17Block D, 32, Hornsey Road N7 7AT 10012792382
168 Apartment 18 Block D, 32, Hornsey Road N7 7AT 10012792383
169 24b Honley Road SE6 2HZ None
170 FLAT B 158 LEAHURST ROAD SE13 5NL 100021976974
171 2 COLLEGE HOUSE CM7 1JS 100091449870
172 3 COLLEGE HOUSE CM7 1JS 100091449871
173 1 Anita Street M4 5DU None
174 2 Anita Street M4 5DU 77123061
175 5 Anita Street M4 5DU 77123081
176 6 Anita Street M4 5DU 77123082
177 8 Anita Street M4 5DU None
178 9 Anita Street M4 5DU None
179 10 Anita Street M4 5DU 77123051
180 12 Anita Street M4 5DU 77123053
181 19 Anita Street M4 5DU None
182 22 Anita Street M4 5DU None
183 26 Anita Street M4 5DU 77123068
184 28 Anita Street M4 5DU None
185 30 Anita Street M4 5DU None
186 32 Anita Street M4 5DU None
187 33 Anita Street M4 5DU 77123076
188 34 Anita Street M4 5DU None
189 35 Anita Street M4 5DU 77123078
190 36 Anita Street M4 5DU 77123079
191 23 George Leigh Street M4 5DR 77123171
192 25 George Leigh Street M4 5DR None
193 35 George Leigh Street M4 5DR 77123177
194 39 George Leigh Street M4 5DR 77123179
195 41 George Leigh Street M4 5DR None
196 43 George Leigh Street M4 5DR None
197 49 George Leigh Street M4 5DR None
198 51 George Leigh Street M4 5DR 77123185
199 55 George Leigh Street M4 5DR None
200 57 George Leigh Street M4 5DR None
201 1a, Victoria Square M4 5DX 77211153
202 2a Victoria Square M4 5DX None
203 4a, Victoria Square M4 5DX 77211155
204 5a Victoria Square M4 5DX 77211156
205 6a Victoria Square M4 5DX 77211157
206 7a Victoria Square M4 5DX 77211158
207 8a Victoria Square M4 5DX 77211159
208 9a Victoria Square M4 5DX 77211160
209 10a Victoria Square M4 5DX 77211161
210 11a Victoria Square M4 5DX 77211162
211 12a Victoria Square M4 5DX 77211163
212 13a Victoria Square M4 5DX 77211164
213 14a Victoria Square M4 5DX 77211165
214 15a Victoria Square M4 5DX 77211166
215 16a Victoria Square M4 5DX 77211167
216 17a Victoria Square M4 5DX 77211168
217 18a Victoria Square M4 5DX 77211169
218 19a Victoria Square M4 5DX 77211170
219 20a Victoria Square M4 5DX 77211171
220 21a Victoria Square M4 5DY 77211172
221 22a Victoria Square M4 5DY None
222 23a Victoria Square M4 5DY 77211174
223 24a Victoria Square M4 5DY 77211175
224 25a Victoria Square M4 5DY 77211176
225 26a Victoria Square M4 5DY 77211177
226 27a Victoria Square M4 5DY 77211178
227 28a Victoria Square M4 5DY None
228 29a Victoria Square M4 5DY 77211180
229 30a Victoria Square M4 5DY 77211181
230 31a Victoria Square M4 5DY 77211182
231 32a Victoria Square M4 5DY 77211183
232 33a Victoria Square M4 5DY 77211184
233 34a Victoria Square M4 5DY 77211185
234 35a Victoria Square M4 5DY None
235 36a Victoria Square M4 5DY 77211187
236 37a Victoria Square M4 5DY 77211188
237 38a Victoria Square M4 5DY 77211189
238 39a Victoria Square M4 5DY 77211190
239 40a Victoria Square M4 5DY None
240 41a Victoria Square M4 5DY 77211192
241 42a Victoria Square M4 5DY 77211193
242 43a Victoria Square M4 5DY 77211194
243 44a Victoria Square M4 5DY 77211195
244 45a Victoria Square M4 5DY 77211196
245 46a Victoria Square M4 5DY 77211197
246 47a Victoria Square M4 5DY 77211198
247 48a Victoria Square M4 5DY 77211199
248 49a Victoria Square M4 5DY 77211200
249 50a Victoria Square M4 5DY 77211201
250 51a Victoria Square M4 5DY 77211202
251 52a Victoria Square M4 5DY 77211203
252 53a Victoria Square M4 5DY 77211204
253 54a Victoria Square M4 5DY 77211205
254 55a Victoria Square M4 5DY 77211206
255 56a Victoria Square M4 5DZ 77211207
256 57a Victoria Square M4 5DZ None
257 58a Victoria Square M4 5DZ 77211209
258 59a Victoria Square M4 5DZ 77211210
259 60a Victoria Square M4 5DZ 77211211
260 61a Victoria Square M4 5DZ 77211212
261 62a Victoria Square M4 5DZ 77211213
262 63a Victoria Square M4 5DZ None
263 64a Victoria Square M4 5DZ 77211215
264 65a Victoria Square M4 5DZ 77211216
265 66a Victoria Square M4 5DZ None
266 67a Victoria Square M4 5DZ None
267 68a Victoria Square M4 5DZ 77211219
268 69a Victoria Square M4 5DZ 77211220
269 70a Victoria Square M4 5DZ 77211221
270 71a Victoria Square M4 5DZ 77211222
271 72a Victoria Square M4 5DZ 77211223
272 73a Victoria Square M4 5DZ 77211224
273 74a Victoria Square M4 5DZ None
274 75a Victoria Square M4 5DZ 77211226
275 76a Victoria Square M4 5DZ 77211227
276 77a Victoria Square M4 5DZ None
277 78a Victoria Square M4 5DZ 77211229
278 79a Victoria Square M4 5DZ 77211230
279 80a Victoria Square M4 5DZ 77211231
280 81a Victoria Square M4 5DZ 77211232
281 82 Victoria Square M4 5DZ None
282 83a Victoria Square M4 5DZ 77211234
283 84a Victoria Square M4 5DZ None
284 85a Victoria Square M4 5DZ 77211236
285 86a Victoria Square M4 5DZ 77211237
286 87a Victoria Square M4 5DZ 77211238
287 88a Victoria Square M4 5DZ None
288 89a Victoria Square M4 5DZ 77211240
289 90a Victoria Square M4 5DZ 77211241
290 91a Victoria Square M4 5DZ 77211242
291 92a Victoria Square M4 5DZ 77211243
292 93a Victoria Square M4 5EA 77211244
293 94a Victoria Square M4 5EA None
294 95a Victoria Square M4 5EA 77211246
295 96a Victoria Square M4 5EA 77211247
296 97a Victoria Square M4 5EA 77211248
297 98a Victoria Square M4 5EA 77211249
298 99a Victoria Square M4 5EA 77211250
299 100a Victoria Square M4 5EA 77211251
300 101a Victoria Square M4 5EA None
301 102a Victoria Square M4 5EA None
302 103a Victoria Square M4 5EA 77211254
303 104a Victoria Square M4 5EA 77211255
304 105a Victoria Square M4 5EA None
305 106a Victoria Square M4 5EA 77211257
306 107a Victoria Square M4 5EA 77211258
307 108a Victoria Square M4 5EA 77211259
308 109a Victoria Square M4 5EA 77211260
309 110a Victoria Square M4 5EA 77211261
310 111a Victoria Square M4 5EA 77211262
311 112a Victoria Square M4 5EA None
312 113a Victoria Square M4 5EA 77211264
313 114a Victoria Square M4 5EA 77211265
314 115a Victoria Square M4 5EA 77211266
315 116a Victoria Square M4 5EA 77211267
316 117a Victoria Square M4 5EA None
317 118a Victoria Square M4 5EA None
318 119a Victoria Square M4 5EA 77211270
319 120a Victoria Square M4 5EA 77211271
320 121a Victoria Square M4 5EA 77211272
321 122a Victoria Square M4 5EA 77211273
322 123a Victoria Square M4 5EA 77211274
323 124a Victoria Square M4 5EA None
324 125a Victoria Square M4 5EA 77211276
325 126a Victoria Square M4 5EA 77211277
326 127a Victoria Square M4 5EA 77211278
327 128a Victoria Square M4 5EA 77211279
328 129a Victoria Square M4 5EA 77211280
329 130a Victoria Square M4 5FA 77211281
330 131a Victoria Square M4 5FA 77211282
331 132a Victoria Square M4 5FA 77211283
332 133a Victoria Square M4 5FA None
333 134a Victoria Square M4 5FA 77211285
334 135a Victoria Square M4 5FA 77211286
335 136a Victoria Square M4 5FA 77211287
336 137a Victoria Square M4 5FA 77211288
337 138a Victoria Square M4 5FA 77211289
338 139a Victoria Square M4 5FA 77211290
339 140a Victoria Square M4 5FA 77211291
340 141a Victoria Square M4 5FA 77211292
341 142a Victoria Square M4 5FA 77211293
342 143a Victoria Square M4 5FA 77211294
343 144a Victoria Square M4 5FA 77211295
344 145a Victoria Square M4 5FA None
345 146a Victoria Square M4 5FA 77211297
346 147a Victoria Square M4 5FA 77211298
347 148a Victoria Square M4 5FA 77211299
348 149a Victoria Square M4 5FA 77211300
349 150a Victoria Square M4 5FA 77211301
350 151a Victoria Square M4 5FA None
351 152a Victoria Square M4 5FA 77211303
352 153a Victoria Square M4 5FA None
353 154a Victoria Square M4 5FA 77211305
354 155a Victoria Square M4 5FA None
355 156a Victoria Square M4 5FA 77211307
356 157a Victoria Square M4 5FA 77211308
357 158a Victoria Square M4 5FA 77211309
358 159a Victoria Square M4 5FA None
359 160a Victoria Square M4 5FA 77211311
360 161a Victoria Square M4 5FA None
361 162a Victoria Square M4 5FA None
362 163a Victoria Square M4 5FA 77211314
363 164a Victoria Square M4 5FA 77211315
364 165a Victoria Square M4 5FA 77211316
365 166a Victoria Square M4 5FA None
366 FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY CR2 7DL None

View file

@ -42,7 +42,7 @@ class Settings(BaseSettings):
AWS_DEFAULT_REGION: Optional[str] = None
class Config:
env_file = "backend/.env"
env_file = "backend/.env.local"
@lru_cache()

View file

@ -3,7 +3,9 @@ from contextlib import contextmanager
from backend.app.config import get_settings
from sqlmodel import Session
connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
connection_string = (
"postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
)
db_string = connection_string.format(
drivername="psycopg2", # You'll need to use psycopg2 driver for PostgreSQL
username=get_settings().DB_USERNAME,
@ -28,7 +30,9 @@ db_engine = create_engine(
def get_db_session():
if db_engine is None:
raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.")
raise RuntimeError(
"Database is not configured. Set DATABASE_URL in environment variables."
)
return Session(db_engine)

View file

@ -0,0 +1,12 @@
from typing import List
from sqlalchemy import insert, delete
from sqlalchemy.orm import Session
from backend.app.db.connection import db_session, db_read_session
from backend.app.db.models.condition import PropertyConditionSurveyModel
def bulk_insert_property_surveys(
session: Session, surveys: List[PropertyConditionSurveyModel]
) -> None:
raise NotImplementedError

View file

@ -0,0 +1,97 @@
from sqlalchemy import (
BigInteger,
Column,
Date,
ForeignKey,
Integer,
String,
Enum as SqlEnum,
)
from sqlalchemy.orm import declarative_base, relationship
from backend.condition.domain.aspect_type import AspectType
from backend.condition.domain.element_type import ElementType
Base = declarative_base()
ElementTypeDb = SqlEnum(
ElementType,
name="element_type",
native_enum=True,
values_callable=lambda enum: [e.value for e in enum],
)
AspectTypeDb = SqlEnum(
AspectType,
name="aspect_type",
native_enum=True,
values_callable=lambda enum: [a.value for a in enum],
)
class PropertyConditionSurveyModel(Base):
__tablename__ = "property_condition_survey"
id = Column(BigInteger, primary_key=True, autoincrement=True)
uprn = Column(BigInteger, nullable=False)
date = Column(Date, nullable=False)
source = Column(String, nullable=False)
elements = relationship(
"ElementModel",
back_populates="survey",
cascade="all, delete-orphan",
)
class ElementModel(Base):
__tablename__ = "element" # TODO: rename to survey_element?
id = Column(BigInteger, primary_key=True, autoincrement=True)
survey_id = Column(
BigInteger,
ForeignKey("property_condition_survey.id"),
nullable=False,
)
element_type = Column(ElementTypeDb, nullable=False)
element_instance = Column(BigInteger, nullable=False)
survey = relationship(
"PropertyConditionSurveyModel",
back_populates="elements",
)
aspect_conditions = relationship(
"AspectConditionModel",
back_populates="element",
cascade="all, delete-orphan",
)
class AspectConditionModel(Base):
__tablename__ = "aspect_condition" # TODO: rename to survey_aspect?
id = Column(BigInteger, primary_key=True, autoincrement=True)
element_id = Column(
BigInteger,
ForeignKey("element.id"),
nullable=False,
)
aspect_type = Column(AspectTypeDb, nullable=False)
aspect_instance = Column(BigInteger, nullable=False)
value = Column(String)
quantity = Column(Integer)
install_date = Column(Date)
renewal_year = Column(Integer)
comments = Column(String)
element = relationship(
"ElementModel",
back_populates="aspect_conditions",
)

View file

@ -1,3 +1,4 @@
# fastapi
fastapi==0.115.2
sqlalchemy==2.0.36
@ -13,4 +14,3 @@ openpyxl==3.1.2
# Basic
pytz
sqlmodel

View file

@ -20,7 +20,7 @@ The processor currently supports file formats provided by **Peabody** and **LBWF
The `local_runner` script allows the processor to be executed in a local environment.
1. Copy a sample input file into the `sample_data/` directory.
1. Copy sample input file(s) into the `sample_data/` directory. If working with Peabody data, you'll need the Landlord Reference / UPRN lookup file as well.
2. Update `local_runner.py` as required, specifically the definitions of:
- `lbwf_path`
- `peabody_path`

View file

@ -21,6 +21,8 @@ def main() -> None:
/ "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx"
)
filepaths = [lbwf_path, peabody_path]
# filepaths = [lbwf_path]
# filepaths = [peabody_path]
for fp in filepaths:
with fp.open("rb") as f:

View file

@ -1,4 +1,4 @@
from typing import BinaryIO, Any, Dict, Iterator, List, Tuple
from typing import BinaryIO, Any, Dict, Iterator, List, Optional, Tuple
from openpyxl import Workbook, load_workbook
from collections import defaultdict
@ -15,7 +15,11 @@ logger = setup_logger()
class LbwfParser(Parser):
def parse(self, file_stream: BinaryIO) -> Any:
def parse(
self,
file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any:
wb: Workbook = load_workbook(file_stream)
address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(
wb

View file

@ -1,8 +1,13 @@
from abc import ABC, abstractmethod
from typing import BinaryIO, Any
from typing import BinaryIO, Any, Dict, Optional
class Parser(ABC):
@abstractmethod
def parse(self, file_stream: BinaryIO) -> Any:
def parse(
self,
file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any:
pass

View file

@ -1,26 +1,55 @@
from typing import Any, BinaryIO, Dict, Iterator, List, Tuple, DefaultDict
import csv
from pathlib import Path
from typing import Any, BinaryIO, Dict, List, Optional, Tuple, DefaultDict
from openpyxl import Workbook, load_workbook
from collections import defaultdict
from backend.condition.parsing.parser import Parser
from backend.condition.parsing.records.peabody.peabody_asset_condition import PeabodyAssetCondition
from backend.condition.parsing.records.peabody.peabody_asset_condition import (
PeabodyAssetCondition,
)
from backend.condition.parsing.records.peabody.peabody_property import PeabodyProperty
from utils.logger import setup_logger
logger = setup_logger()
class PeabodyParser(Parser):
def parse(self, file_stream: BinaryIO) -> Any:
def parse(
self,
file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any:
wb: Workbook = load_workbook(file_stream)
address_to_uprn_map: Dict[str, int] = PeabodyParser._generate_address_to_uprn_dict(wb)
assets = self._parse_assets(wb)
return self._group_assets_into_properties(
assets=assets,
address_to_uprn_map=address_to_uprn_map,
if location_ref_to_uprn_map is None:
location_ref_to_uprn_map: Dict[str, int] = (
PeabodyParser._build_location_ref_to_uprn_map()
)
assets = PeabodyParser._parse_assets(wb)
return PeabodyParser._group_assets_into_properties(
assets=assets,
location_ref_to_uprn_map=location_ref_to_uprn_map,
)
@staticmethod
def _build_location_ref_to_uprn_map() -> Dict[str, int]:
location_ref_to_uprn_filepath: Path = (
Path(__file__).resolve().parents[1]
/ "sample_data"
/ "peabody"
/ "PeabodyPropertymatched_Dec25_propref_UPRN.csv"
)
location_ref_to_uprn_map: Dict[str, int] = {}
with location_ref_to_uprn_filepath.open(newline="") as f:
reader: Any = csv.DictReader(f)
for row in reader:
location_ref_to_uprn_map[row["reference"]] = int(row["out_uprn"])
return location_ref_to_uprn_map
@staticmethod
def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]:
@ -33,7 +62,9 @@ class PeabodyParser(Parser):
assets: List[PeabodyAssetCondition] = []
for row in asset_rows:
try:
asset = PeabodyParser._map_row_to_asset_record(row, asset_header_indexes)
asset = PeabodyParser._map_row_to_asset_record(
row, asset_header_indexes
)
if not asset.is_block_level:
# Block-level condition surveys are out of scope for now
# until we have a wider think on how to handle block
@ -48,24 +79,26 @@ class PeabodyParser(Parser):
@staticmethod
def _group_assets_into_properties(
assets: List[PeabodyAssetCondition],
address_to_uprn_map: Dict[str, int],
location_ref_to_uprn_map: Dict[str, int],
) -> List[PeabodyProperty]:
assets_by_address: DefaultDict[str, List[PeabodyAssetCondition]] = defaultdict(list)
assets_by_location_reference: DefaultDict[str, List[PeabodyAssetCondition]] = (
defaultdict(list)
)
for asset in assets:
if asset.full_address is None:
if asset.lo_reference is None:
continue
address = asset.full_address.strip()
assets_by_address[address].append(asset)
assets_by_location_reference[asset.lo_reference].append(asset)
properties: List[PeabodyProperty] = []
for address, grouped_assets in assets_by_address.items():
uprn = address_to_uprn_map.get(address)
for location_ref, grouped_assets in assets_by_location_reference.items():
uprn = location_ref_to_uprn_map.get(location_ref)
if uprn is None:
logger.warning(f"No UPRN found for address: {address}")
logger.warning(f"No UPRN found for Location Reference: {location_ref}")
continue
properties.append(
@ -77,7 +110,6 @@ class PeabodyParser(Parser):
return properties
@staticmethod
def _map_row_to_asset_record(
row: Any | Tuple[object | None, ...],
@ -102,39 +134,9 @@ class PeabodyParser(Parser):
condition_survey_date=row[header_indexes["condition_survey_date"]],
)
@staticmethod
def _generate_address_to_uprn_dict(wb: Workbook) -> Dict[str, int | None]:
sheet = wb["Survey Records - D & Lower"]
rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True)
headers = next(rows)
header_indexes: Dict[str, int] = PeabodyParser._get_column_indexes_by_name(headers)
address_idx = header_indexes["full_address"]
address_to_uprn: Dict[str, int] = {}
# Generate random UPRNs for now
next_uprn = 1 # TODO: get real UPRNs
for row in rows:
address = row[address_idx]
if address is None:
continue
address = address.strip()
if address not in address_to_uprn:
address_to_uprn[address] = next_uprn
next_uprn += 1
return address_to_uprn
@staticmethod
def _get_column_indexes_by_name(
headers: Tuple[object | None, ...]
headers: Tuple[object | None, ...],
) -> Dict[str, int]:
index: Dict[str, int] = {}

View file

@ -0,0 +1,86 @@
import time
from typing import List, Optional
from sqlmodel import Session
from utils.logger import setup_logger
from backend.app.db.models.condition import (
AspectConditionModel,
ElementModel,
PropertyConditionSurveyModel,
)
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
from backend.app.db.connection import db_session
logger = setup_logger()
class ConditionPostgres:
def bulk_insert_surveys(
self, surveys: List[PropertyConditionSurvey], batch_size: Optional[int] = 100
) -> None:
logger.info(
f"Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..."
)
survey_models: List[PropertyConditionSurveyModel] = [
ConditionPostgres.map_survey_to_model(s) for s in surveys
]
total: int = len(survey_models)
logger.info(
f"Finished mapping {total} surveys. Writing to database in batches of {batch_size}..."
)
with db_session() as session:
for start in range(0, total, batch_size):
end = min(start + batch_size, total)
batch = survey_models[start:end]
t0: float = time.perf_counter()
ConditionPostgres._insert_surveys_batch(batch, session)
elapsed: float = time.perf_counter() - t0
logger.info(
f"Inserted batch {start} - {end} ({len(batch)} surveys) in {elapsed} seconds",
)
@staticmethod
def map_survey_to_model(
survey: PropertyConditionSurvey,
) -> PropertyConditionSurveyModel:
survey_model = PropertyConditionSurveyModel(
uprn=survey.uprn,
date=survey.date,
source=survey.source,
elements=[],
)
for element in survey.elements:
element_model = ElementModel(
element_type=element.element_type,
element_instance=element.element_instance,
aspect_conditions=[],
)
for aspect in element.aspect_conditions:
aspect_model = AspectConditionModel(
aspect_type=aspect.aspect_type,
aspect_instance=aspect.aspect_instance,
value=aspect.value,
quantity=aspect.quantity,
install_date=aspect.install_date,
renewal_year=aspect.renewal_year,
comments=aspect.comments,
)
element_model.aspect_conditions.append(aspect_model)
survey_model.elements.append(element_model)
return survey_model
@staticmethod
def _insert_surveys_batch(
surveys: List[PropertyConditionSurveyModel], session: Session
) -> None:
session.add_all(surveys)
session.commit()

View file

@ -1,25 +1,33 @@
from typing import Any, BinaryIO, List
from datetime import datetime
from utils.logger import setup_logger
from backend.condition.domain.mapping.mapper import Mapper
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
from backend.condition.parsing.parser import Parser
from utils.logger import setup_logger
from backend.condition.persistence.condition_postgres import ConditionPostgres
from backend.condition.file_type import FileType, detect_file_type
from backend.condition.parsing.factory import select_parser, select_mapper
logger = setup_logger()
def process_file(file_stream: BinaryIO, source_key: str) -> None:
print(f"[processor] Received file: {source_key}")
logger.info(f"[processor] Received file: {source_key}")
# Instantiation
file_type: FileType = detect_file_type(source_key)
parser: Parser = select_parser(file_type)
mapper: Mapper = select_mapper(file_type)
persistence = ConditionPostgres()
# Orchestration
raw_properties: List[Any] = parser.parse(file_stream)
logger.info(
f"[processor] Finished loading customer survey data for {len(raw_properties)} properties. Mapping..."
)
survey_year = datetime.now().year # TODO: get this from filepath or elsewhere
property_condition_surveys: List[PropertyConditionSurvey] = []
@ -29,4 +37,10 @@ def process_file(file_stream: BinaryIO, source_key: str) -> None:
mapper.map_asset_conditions_for_property(p, survey_year)
)
print("done") # temp
logger.info(
f"[processor] Finished mapping {len(property_condition_surveys)} properties. Writing to database..."
)
persistence.bulk_insert_surveys(property_condition_surveys)
logger.info(f"[processor] Finished loading surveys to database")

View file

@ -1,3 +1,4 @@
from backend.app.db.models.condition import PropertyConditionSurveyModel
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
@ -72,3 +73,41 @@ class CustomAsserts:
f"{actual_aspect.comments} != {expected_aspect.comments}"
)
return True
def assert_property_condition_survey_model_matches_expected(
actual_model: PropertyConditionSurveyModel,
expected: dict,
) -> None:
assert actual_model.uprn == expected["uprn"], "UPRN differs"
assert actual_model.date == expected["date"], "Date differs"
assert actual_model.source == expected["source"], "Source differs"
assert len(actual_model.elements) == len(expected["elements"]), (
f"Expected {len(expected['elements'])} elements, "
f"got {len(actual_model.elements)}"
)
for i, (actual_element, expected_element) in enumerate(
zip(actual_model.elements, expected["elements"])
):
assert (
actual_element.element_type == expected_element["element_type"]
), f"Element[{i}].element_type differs"
assert (
actual_element.element_instance == expected_element["element_instance"]
), f"Element[{i}].element_instance differs"
assert len(actual_element.aspect_conditions) == len(
expected_element["aspects"]
), f"Element[{i}] aspect count differs"
for j, (actual_aspect, expected_aspect) in enumerate(
zip(actual_element.aspect_conditions, expected_element["aspects"])
):
prefix = f"Element[{i}].Aspect[{j}]"
for key, value in expected_aspect.items():
assert getattr(actual_aspect, key) == value, (
f"{prefix}.{key} differs: "
f"{getattr(actual_aspect, key)} != {value}"
)

View file

@ -1,19 +1,23 @@
import pytest
from typing import Any
from typing import Any, Dict
from io import BytesIO
from openpyxl import Workbook
from datetime import datetime
from backend.condition.parsing.peabody_parser import PeabodyParser
from backend.condition.parsing.records.peabody.peabody_asset_condition import PeabodyAssetCondition
from backend.condition.parsing.records.peabody.peabody_asset_condition import (
PeabodyAssetCondition,
)
from backend.condition.parsing.records.peabody.peabody_property import PeabodyProperty
@pytest.fixture
def peabody_assets_xlsx_bytes() -> BytesIO:
wb = Workbook()
survey_records_d_and_lower = wb.active
survey_records_d_and_lower.title = "Survey Records - D & Lower"
survey_records_d_and_lower.append([
survey_records_d_and_lower.append(
[
"Lo_Reference",
"full_address",
"location_type_code",
@ -30,8 +34,10 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
"cloned",
"lo_type_code",
"condition_survey_date",
])
survey_records_d_and_lower.append([
]
)
survey_records_d_and_lower.append(
[
"B000RAND",
"1 RANDOM HOUSE LONDON",
3,
@ -47,9 +53,11 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
330,
"N",
3,
datetime(2025,12,4,9,17,0)
])
survey_records_d_and_lower.append([
datetime(2025, 12, 4, 9, 17, 0),
]
)
survey_records_d_and_lower.append(
[
"B000BLOCK",
"1100 BLOCK",
3,
@ -65,9 +73,11 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
330,
"N",
3,
datetime(2025,12,4,9,17,0)
])
survey_records_d_and_lower.append([
datetime(2025, 12, 4, 9, 17, 0),
]
)
survey_records_d_and_lower.append(
[
"B000FAKE",
"3 FAKE CLOSE LONDON",
3,
@ -83,9 +93,11 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
1500.7,
"N",
3,
datetime(2025,7,5,0,0,0)
])
survey_records_d_and_lower.append([
datetime(2025, 7, 5, 0, 0, 0),
]
)
survey_records_d_and_lower.append(
[
"B000MIS",
"99 MISC ROAD LONDON",
3,
@ -101,9 +113,11 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
None,
"N",
3,
None
])
survey_records_d_and_lower.append([
None,
]
)
survey_records_d_and_lower.append(
[
"B000MIS",
"99 MISC ROAD LONDON",
3,
@ -119,9 +133,9 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
3531,
"N",
3,
None
])
None,
]
)
stream = BytesIO()
wb.save(stream)
@ -129,18 +143,32 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
return stream
def test_peabody_parser_parses_conditions(peabody_assets_xlsx_bytes):
@pytest.fixture
def location_ref_to_uprn_map() -> Dict[str, int]:
return {
"B000RAND": 1,
"B000BLOCK": 2,
"B000FAKE": 3,
"B000MIS": 4,
}
def test_peabody_parser_parses_conditions(
peabody_assets_xlsx_bytes, location_ref_to_uprn_map
):
# arrange
parser = PeabodyParser()
# act
result: Any = parser.parse(peabody_assets_xlsx_bytes)
result: Any = parser.parse(peabody_assets_xlsx_bytes, location_ref_to_uprn_map)
# assert
assert len(result) == 3
assert all(isinstance(item, PeabodyProperty) for item in result)
@pytest.fixture
def asset_condition_factory():
def _factory(full_address: str) -> PeabodyAssetCondition:
@ -165,6 +193,7 @@ def asset_condition_factory():
return _factory
@pytest.mark.parametrize(
"full_address, expected_block_level",
[
@ -175,7 +204,7 @@ def asset_condition_factory():
("81A-B GORE ROAD LONDON", True),
("73 & 74 HARVEST COURT ST. ALBANS", True),
("25 HAVERSHAM COURT GREENFORD", False),
("FLAT 10 SPARROW COURT SOUTHMERE DRIVE LONDON SE2 9ES", False)
("FLAT 10 SPARROW COURT SOUTHMERE DRIVE LONDON SE2 9ES", False),
],
)
def test_peabody_asset_is_block_level(

View file

@ -0,0 +1,164 @@
import pytest
from datetime import date
from backend.condition.persistence.condition_postgres import ConditionPostgres
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
from backend.condition.domain.element import Element
from backend.condition.domain.element_type import ElementType
from backend.condition.domain.aspect_condition import AspectCondition
from backend.condition.domain.aspect_type import AspectType
from backend.app.db.models.condition import PropertyConditionSurveyModel
from backend.condition.tests.custom_asserts import CustomAsserts
def test_map_survey_to_model() -> None:
# arrange
survey = PropertyConditionSurvey(
uprn=1,
elements=[
Element(
element_type=ElementType.EXTERNAL_WINDOWS,
element_instance=1,
aspect_conditions=[
AspectCondition(
aspect_type=AspectType.MATERIAL,
aspect_instance=1,
value="UPVC Double Glazed",
quantity=8,
install_date=None,
renewal_year=2036,
comments=None,
),
],
),
Element(
element_type=ElementType.EXTERNAL_DECORATION,
element_instance=1,
aspect_conditions=[
AspectCondition(
aspect_type=AspectType.CONDITION,
aspect_instance=1,
value="Normal",
quantity=1,
install_date=None,
renewal_year=2029,
comments=None,
)
],
),
Element(
element_type=ElementType.EXTERNAL_WALL,
element_instance=1,
aspect_conditions=[
AspectCondition(
aspect_type=AspectType.FINISH,
aspect_instance=1,
value="Pointed",
quantity=65,
install_date=None,
renewal_year=2045,
comments=None,
),
AspectCondition(
aspect_type=AspectType.FINISH,
aspect_instance=1,
value="Pointing",
quantity=1,
install_date=None,
renewal_year=2069,
comments=None,
),
AspectCondition(
aspect_type=AspectType.FINISH,
aspect_instance=2,
value="Tile Hung",
quantity=8,
install_date=None,
renewal_year=2049,
comments=None,
),
],
),
],
date=date(2000, 1, 1),
source="Peabody",
)
expected = {
"uprn": 1,
"date": date(2000, 1, 1),
"source": "Peabody",
"elements": [
{
"element_type": ElementType.EXTERNAL_WINDOWS,
"element_instance": 1,
"aspects": [
{
"aspect_type": AspectType.MATERIAL,
"aspect_instance": 1,
"value": "UPVC Double Glazed",
"quantity": 8,
"install_date": None,
"renewal_year": 2036,
"comments": None,
}
],
},
{
"element_type": ElementType.EXTERNAL_DECORATION,
"element_instance": 1,
"aspects": [
{
"aspect_type": AspectType.CONDITION,
"aspect_instance": 1,
"value": "Normal",
"quantity": 1,
"install_date": None,
"renewal_year": 2029,
"comments": None,
}
],
},
{
"element_type": ElementType.EXTERNAL_WALL,
"element_instance": 1,
"aspects": [
{
"aspect_instance": 1,
"value": "Pointed",
"quantity": 65,
"install_date": None,
"renewal_year": 2045,
"comments": None,
},
{
"aspect_type": AspectType.FINISH,
"aspect_instance": 1,
"value": "Pointing",
"quantity": 1,
"install_date": None,
"renewal_year": 2069,
"comments": None,
},
{
"aspect_type": AspectType.FINISH,
"aspect_instance": 2,
"value": "Tile Hung",
"quantity": 8,
"install_date": None,
"renewal_year": 2049,
"comments": None,
},
],
},
],
}
# act
model: PropertyConditionSurveyModel = ConditionPostgres.map_survey_to_model(survey)
# assert (survey level)
CustomAsserts.assert_property_condition_survey_model_matches_expected(
model,
expected,
)

View file

@ -1,3 +1,4 @@
# Pandas and numpy
numpy==2.1.2
pandas==2.2.3

Binary file not shown.

View file

@ -0,0 +1,114 @@
import pandas as pd
import requests
from backend.address2UPRN.main import resolve_uprns_for_postcode_group, get_epc_data_with_postcode
from tqdm import tqdm
def sanitise_postcode(postcode: str) -> str | None:
"""
Normalise postcode for grouping.
- Uppercase
- Remove all whitespace
"""
if pd.isna(postcode):
return None
return postcode.upper().replace(" ", "")
def is_valid_postcode(postcode_clean: str) -> bool:
"""
Validate postcode using postcodes.io.
Expects a sanitised postcode (e.g. E84SQ).
Returns True if valid, False otherwise.
"""
POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
if not postcode_clean:
return False
try:
resp = requests.get(
POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
timeout=5,
)
resp.raise_for_status()
return resp.json().get("result", False)
except requests.RequestException:
# Network issues, rate limits, etc.
return False
def main():
df = pd.read_excel("hackney.xlsx", sheet_name="Sustainability")
df = df.head(500)
# Sanitise postcodes
df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode)
# --- validate AFTER grouping (save API calls) ---
# Get unique, non-null postcodes
unique_postcodes = (
df["postcode_clean"]
.dropna()
.unique()
)
# Validate each postcode once, TODOadd a progress bar
postcode_validity = {
pc: is_valid_postcode(pc)
for pc in tqdm(unique_postcodes, total=len(unique_postcodes))
}
# Map validity back onto dataframe
df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
results = []
for postcode, group_df in tqdm(
df[df["postcode_valid"]].groupby("postcode_clean"),
desc="Resolving UPRNs by postcode",
):
try:
epc_df = get_epc_data_with_postcode(postcode)
if epc_df.empty:
tmp = group_df.copy()
tmp["found_uprn"] = None
tmp["status"] = "no_epc_results"
results.append(tmp)
continue
resolved = resolve_uprns_for_postcode_group(
group_df=group_df,
epc_df=epc_df,
)
results.append(resolved)
except Exception as e:
tmp = group_df.copy()
tmp["found_uprn"] = None
tmp["status"] = "exception"
tmp["error"] = str(e)
results.append(tmp)
final_df = pd.concat(results, ignore_index=True)
a = final_df[[
"best_match_lexiscore","Address 1",
"best_match_address", "Postcode",
"UPRN", "best_match_uprn"
]] # add levi score to viewing
b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing
b = b[[
"best_match_lexiscore","Address 1",
"best_match_address", "Postcode",
"UPRN", "best_match_uprn"
]]
if __name__ == "__main__":
main()

View file

@ -1,5 +1,11 @@
import os
from backend.app.config import get_settings
import os
from dotenv import load_dotenv
import os
# Load .env in conftest.py directory for local development
load_dotenv()
DEFAULT_ENV = {
"API_KEY": "test",
@ -8,7 +14,10 @@ DEFAULT_ENV = {
"DATA_BUCKET": "test",
"PLAN_TRIGGER_BUCKET": "test",
"ENGINE_SQS_URL": "test",
"EPC_AUTH_TOKEN": "test", # overridden in GitHub Actions
"EPC_AUTH_TOKEN": os.getenv(
"EPC_AUTH_TOKEN",
"test",
), # overridden in GitHub Actions
"GOOGLE_SOLAR_API_KEY": "test",
"DB_HOST": "localhost",
"DB_USERNAME": "test",

View file

@ -1,111 +1,111 @@
import pandas as pd
epc_c_recommendations = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
"solid floor, ashp 3.0 - corrected.xlsx"
)
epc_b_recommendations = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
"solid floor, ashp 3.0 - corrected.xlsx"
)
# epc_c_recommendations = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
# "solid floor, ashp 3.0 - corrected.xlsx"
# )
# epc_b_recommendations = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
# "solid floor, ashp 3.0 - corrected.xlsx"
# )
epc_c_movers = epc_b_recommendations[
epc_b_recommendations["current_epc_rating"] == "Epc.C"
]
epc_c_movers["property_type"].value_counts()
# epc_c_movers = epc_b_recommendations[
# epc_b_recommendations["current_epc_rating"] == "Epc.C"
# ]
# epc_c_movers["property_type"].value_counts()
house_epc_c_movers = epc_c_movers[
epc_c_movers["property_type"] == "House"
]
house_epc_c_movers_with_solar = house_epc_c_movers[
~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
]
# house_epc_c_movers = epc_c_movers[
# epc_c_movers["property_type"] == "House"
# ]
# house_epc_c_movers_with_solar = house_epc_c_movers[
# ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
# ]
house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
]
# house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
# ~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
# ]
flat_epc_c_movers = epc_c_movers[
epc_c_movers["property_type"] == "Flat"
]
# flat_epc_c_movers = epc_c_movers[
# epc_c_movers["property_type"] == "Flat"
# ]
epc_c_recommendations["sap_points"].mean()
epc_c_recommendations["sap_points"].mean()
# epc_c_recommendations["sap_points"].mean()
# epc_c_recommendations["sap_points"].mean()
measure_cols = [
"air_source_heat_pump",
"boiler_upgrade",
"cavity_wall_insulation",
"double_glazing",
"external_wall_insulation",
"flat_roof_insulation",
"high_heat_retention_storage_heaters",
"internal_wall_insulation",
"loft_insulation",
"low_energy_lighting",
"mechanical_ventilation",
"room_roof_insulation",
"roomstat_programmer_trvs",
"sealing_open_fireplace",
"secondary_glazing",
"secondary_heating",
"solar_pv",
"solar_pv_with_battery",
"suspended_floor_insulation",
"time_temperature_zone_control",
]
# measure_cols = [
# "air_source_heat_pump",
# "boiler_upgrade",
# "cavity_wall_insulation",
# "double_glazing",
# "external_wall_insulation",
# "flat_roof_insulation",
# "high_heat_retention_storage_heaters",
# "internal_wall_insulation",
# "loft_insulation",
# "low_energy_lighting",
# "mechanical_ventilation",
# "room_roof_insulation",
# "roomstat_programmer_trvs",
# "sealing_open_fireplace",
# "secondary_glazing",
# "secondary_heating",
# "solar_pv",
# "solar_pv_with_battery",
# "suspended_floor_insulation",
# "time_temperature_zone_control",
# ]
epc_c_melted = (
epc_c_recommendations
.melt(
id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
value_vars=measure_cols,
var_name="measure_type",
value_name="value",
)
.dropna(subset=["value"])
)
epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
# epc_c_melted = (
# epc_c_recommendations
# .melt(
# id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
# value_vars=measure_cols,
# var_name="measure_type",
# value_name="value",
# )
# .dropna(subset=["value"])
# )
# epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
# epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
epc_b_melted = (
epc_b_recommendations
.melt(
id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
value_vars=measure_cols,
var_name="measure_type",
value_name="value",
)
.dropna(subset=["value"])
)
# epc_b_melted = (
# epc_b_recommendations
# .melt(
# id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
# value_vars=measure_cols,
# var_name="measure_type",
# value_name="value",
# )
# .dropna(subset=["value"])
# )
epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
# epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
# epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
measures_compared = epc_c_measures.merge(
epc_b_measures,
left_on="measure_type",
right_on="measure_type",
suffixes=("_epc_c", "_epc_b"),
)
# measures_compared = epc_c_measures.merge(
# epc_b_measures,
# left_on="measure_type",
# right_on="measure_type",
# suffixes=("_epc_c", "_epc_b"),
# )
epc_c_retrofits = epc_c_recommendations[
epc_c_recommendations["total_retrofit_cost"] > 0
]
# epc_c_retrofits = epc_c_recommendations[
# epc_c_recommendations["total_retrofit_cost"] > 0
# ]
epc_b_retrofits = epc_b_recommendations[
epc_b_recommendations["total_retrofit_cost"] > 0
]
# epc_b_retrofits = epc_b_recommendations[
# epc_b_recommendations["total_retrofit_cost"] > 0
# ]
epc_c_retrofits["sap_points"].mean()
epc_b_retrofits["sap_points"].mean()
# epc_c_retrofits["sap_points"].mean()
# epc_b_retrofits["sap_points"].mean()
properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))
# properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))
properties_in_both["total_retrofit_cost_epc_c"].mean()
properties_in_both["sap_points_epc_c"].mean()
properties_in_both["total_retrofit_cost_epc_b"].mean()
properties_in_both["sap_points_epc_b"].mean()
# properties_in_both["total_retrofit_cost_epc_c"].mean()
# properties_in_both["sap_points_epc_c"].mean()
# properties_in_both["total_retrofit_cost_epc_b"].mean()
# properties_in_both["sap_points_epc_b"].mean()
# Solar PV savings - we need the amount of solar PV bill savings
from sqlalchemy.orm import sessionmaker
@ -114,16 +114,12 @@ from backend.app.db.models.recommendations import Recommendation, Plan, PlanReco
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from collections import defaultdict
PORTFOLIO_ID = 435 # Peabody
PORTFOLIO_ID = 485 # Peabody
SCENARIOS = [
908,
909,
910,
970
]
scenario_names = {
908: "EPC C - no solid floor, ashp 3.0",
909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0",
910: "EPC B - no solid floor, no EWI, ashp 3.0"
970: "EPC C - no solid floor, ashp 3.0",
}
@ -236,307 +232,266 @@ recommendations_df = pd.DataFrame(recommendations_data)
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
s_id = 910
ps_w_a_plan = plans_df[plans_df["scenario_id"] == s_id].copy()
# Take the newest by scenario id
ps_w_a_plan = ps_w_a_plan.sort_values("created_at", ascending=False).drop_duplicates(
subset=["property_id"]
)
z = ps_w_a_plan[
ps_w_a_plan["cost_of_works"] > 0
].copy()
z2 = properties_df[properties_df["property_id"].isin(z["property_id"].values)]
# '', 'hot_water_cost_current',
# 'lighting_cost_current', 'appliances_cost_current',
# 'gas_standing_charge', 'electricity_standing_charge'
z2["total_bills"] = z2["heating_cost_current"] + z2["hot_water_cost_current"] + z2["lighting_cost_current"] + z2[
"appliances_cost_current"
] + z2["gas_standing_charge"] + z2["electricity_standing_charge"]
with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer:
recommendations_df.to_excel(writer, sheet_name="recommendations", index=False)
properties_df.to_excel(writer, sheet_name="properties", index=False)
from tqdm import tqdm
# For a property ID, find a property where the no EWI/IWI approach is more expensive than the EWI approach
pids = properties_df["property_id"].unique()
for pid in tqdm(pids):
# solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
# average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
if pid in [603272, 550550, 574493]:
continue
# get the plans
property_plan = plans_df[plans_df["property_id"] == int(pid)]
# Take the newest plan by scenario id
property_plan = property_plan.sort_values("created_at", ascending=False).drop_duplicates(
subset=["scenario_id"]
)
a = property_plan[property_plan["scenario_id"] == 909].squeeze() # no EWI/IWI
b = property_plan[property_plan["scenario_id"] == 908].squeeze() # EWI
if (a["cost_of_works"] > b["cost_of_works"]) and (
a["post_epc_rating"].value == "C") and (b["cost_of_works"] > 5000):
bah
solar_pv_recommendations = recommendations_df[
recommendations_df["measure_type"] == "solar_pv"
]
# # Check tenures
# initial_asset_data = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
# "- Data Extracts for Domna.xlsx",
# sheet_name="Properties"
# )
# sustainability_data = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
# "- Data Extracts for Domna.xlsx",
# sheet_name="Sustainability"
# )
solid_wall_recommendation = recommendations_df[
recommendations_df["scenario_id"].isin([908]) &
recommendations_df["measure_type"].isin(["internal_wall_insulation"]) &
recommendations_df["default"]
]
average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
# Add on scenarion names
average_savings["scenario_name"] = average_savings["scenario_id"].map(scenario_names)
# sustainability_sample = sustainability_data[
# sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values)
# ]
# Check tenures
initial_asset_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Properties"
)
sustainability_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Sustainability"
)
# sustainability_sample = sustainability_sample.merge(
# initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset")
# )
sustainability_sample = sustainability_data[
sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values)
]
# block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
# block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False)
sustainability_sample = sustainability_sample.merge(
initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset")
)
# initial_asset_data.columns
# initial_asset_data["LeaseType"].value_counts()
block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False)
# # sustainability_sample["Tenure Group"].value_counts()
# # Tenure Group
# # General Needs 57787
# # Home Ownership 25471
# # Care & Supported Housing 4239
# # Rental 2677
# # Other 188
initial_asset_data.columns
initial_asset_data["LeaseType"].value_counts()
# df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index()
# df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False)
# sustainability_sample["Tenure Group"].value_counts()
# Tenure Group
# General Needs 57787
# Home Ownership 25471
# Care & Supported Housing 4239
# Rental 2677
# Other 188
# tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index()
# tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)
df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index()
df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False)
# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()
tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index()
tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)
# sample_data = initial_asset_data[
# ~initial_asset_data["Ownership Type"].isin(
# [
# # Commercial # Everything is resi - based on the Residential Indicator variable - all are true
# # Freeholder
# "FREEHOLDER", # 19517 properties
# # HOMEBUY / EQUITY LOAN
# "Rent to Homebuy", # 1 property
# # Leaseholder
# "LEASEHOLD 100%", # 8455 properties
# "Owned and Managed - 999 year lease", # 2076 properties
# "Managed but not Owned-Private Lease", # 159 properties
# "Owned and managed LEASEHOLD", # 26 properties
# # Outright Sale - can't find anything matching
# # SHARED EQUITY
# "Shared Ownership", # 4065 properties
# "Shared Ownership Owned Not Managed", # 23 properties
# # Extra categories which seem sensible to exclude
# "NOT MANAGED AND NOT OWNED"
# ]
# )
# ]
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()
# sample_data["Ownership Type"].value_counts()
sample_data = initial_asset_data[
~initial_asset_data["Ownership Type"].isin(
[
# Commercial # Everything is resi - based on the Residential Indicator variable - all are true
# Freeholder
"FREEHOLDER", # 19517 properties
# HOMEBUY / EQUITY LOAN
"Rent to Homebuy", # 1 property
# Leaseholder
"LEASEHOLD 100%", # 8455 properties
"Owned and Managed - 999 year lease", # 2076 properties
"Managed but not Owned-Private Lease", # 159 properties
"Owned and managed LEASEHOLD", # 26 properties
# Outright Sale - can't find anything matching
# SHARED EQUITY
"Shared Ownership", # 4065 properties
"Shared Ownership Owned Not Managed", # 23 properties
# Extra categories which seem sensible to exclude
"NOT MANAGED AND NOT OWNED"
]
)
]
# sample_data = initial_asset_data[
# initial_asset_data["Ownership Type"].isin(
# [
# "Owned and Managed",
# "Owned and Managed - 999 year lease",
# "Owned and managed LEASEHOLD",
# "LEASEHOLD 100%",
# "DATALOAD DEFAULT"
# ]
# )
# ]
# dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)]
# dropped["Ownership Type"].value_counts()
sample_data["Ownership Type"].value_counts()
# for value in [
# # Commercial # Everything is resi, so should be fine. No matches
# # Freeholder
# "FREEHOLDER", # 19517 properties
# # HOMEBUY / EQUITY LOAN
# "Rent to Homebuy", # 1 property
# # Leaseholder
# "LEASEHOLD 100%", # 8455 properties
# "Owned and Managed - 999 year lease", # 2076 properties
# "Managed but not Owned-Private Lease", # 159 properties
# "Owned and managed LEASEHOLD", # 26 properties
# # Outright Sale - can't find anything matching
# # SHARED EQUITY
# "Shared Ownership", # 4065 properties
# "Shared Ownership Owned Not Managed", # 23 properties
# ]:
# print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0])
sample_data = initial_asset_data[
initial_asset_data["Ownership Type"].isin(
[
"Owned and Managed",
"Owned and Managed - 999 year lease",
"Owned and managed LEASEHOLD",
"LEASEHOLD 100%",
"DATALOAD DEFAULT"
]
)
]
dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)]
dropped["Ownership Type"].value_counts()
# house_types = [
# "HOUSE",
# "BUNGALOW",
# "MAISONETTE",
# "DUPLEX",
# ]
for value in [
# Commercial # Everything is resi, so should be fine. No matches
# Freeholder
"FREEHOLDER", # 19517 properties
# HOMEBUY / EQUITY LOAN
"Rent to Homebuy", # 1 property
# Leaseholder
"LEASEHOLD 100%", # 8455 properties
"Owned and Managed - 999 year lease", # 2076 properties
"Managed but not Owned-Private Lease", # 159 properties
"Owned and managed LEASEHOLD", # 26 properties
# Outright Sale - can't find anything matching
# SHARED EQUITY
"Shared Ownership", # 4065 properties
"Shared Ownership Owned Not Managed", # 23 properties
]:
print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0])
# guaranteed_control = [
# "Owned and Managed",
# "Owned and Managed - 999 year lease",
# "Owned and managed LEASEHOLD",
# "LEASEHOLD 100%",
# "DATALOAD DEFAULT",
# ]
house_types = [
"HOUSE",
"BUNGALOW",
"MAISONETTE",
"DUPLEX",
]
# sample_data = initial_asset_data[
# (
# initial_asset_data["Ownership Type"].isin(guaranteed_control)
# )
# |
# (
# (initial_asset_data["Ownership Type"] == "FREEHOLDER")
# &
# (initial_asset_data["Property Type"].isin(house_types))
# )
# ]
guaranteed_control = [
"Owned and Managed",
"Owned and Managed - 999 year lease",
"Owned and managed LEASEHOLD",
"LEASEHOLD 100%",
"DATALOAD DEFAULT",
]
# fabric_retrofit_sample = initial_asset_data[
# initial_asset_data["Ownership Type"].isin(
# [
# "Owned and Managed",
# "FREEHOLDER",
# "DATALOAD DEFAULT",
# ]
# )
# ]
sample_data = initial_asset_data[
(
initial_asset_data["Ownership Type"].isin(guaranteed_control)
)
|
(
(initial_asset_data["Ownership Type"] == "FREEHOLDER")
&
(initial_asset_data["Property Type"].isin(house_types))
)
]
# initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
fabric_retrofit_sample = initial_asset_data[
initial_asset_data["Ownership Type"].isin(
[
"Owned and Managed",
"FREEHOLDER",
"DATALOAD DEFAULT",
]
)
]
# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts()
# z = initial_asset_data[
# ~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types)
# ]
initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
# block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
# zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"]
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts()
z = initial_asset_data[
~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types)
]
# potential_sample = initial_asset_data[
# ~pd.isnull(initial_asset_data["BlockCode"])
# ]
block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"]
# compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
# initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
# left_on="Property Type",
# right_on="Property Type",
# suffixes=("_on_block_codes", "_overall")
# )
potential_sample = initial_asset_data[
~pd.isnull(initial_asset_data["BlockCode"])
]
# # Comparison of smaller sample vs overall
# new_asset_data = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
# "- Peabody "
# "- Data Extracts for Domna v2.xlsx",
# sheet_name="Properties"
# )
compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
left_on="Property Type",
right_on="Property Type",
suffixes=("_on_block_codes", "_overall")
)
# new_sustainability_data = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
# "- Peabody "
# "- Data Extracts for Domna v2.xlsx",
# sheet_name="Sustainability"
# )
# Comparison of smaller sample vs overall
new_asset_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
"- Peabody "
"- Data Extracts for Domna v2.xlsx",
sheet_name="Properties"
)
# sap_bands = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
# "08012026.xlsx",
# )
new_sustainability_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
"- Peabody "
"- Data Extracts for Domna v2.xlsx",
sheet_name="Sustainability"
)
# combined = new_asset_data.merge(
# new_sustainability_data,
# left_on="UPRN",
# right_on="Org Ref",
# suffixes=("_asset", "_sustainability")
# ).merge(
# sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef"
# )
# reduced_sample = combined[
# ~combined["AH Tenure"].isin(
# ["Commercial",
# "Freeholder",
# "HOMEBUY / EQUITY LOAN",
# "Leaseholder",
# "Outright Sale",
# "SHARED EQUITY",
# "Shared Ownership"]
# )
# ].copy()
sap_bands = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
"08012026.xlsx",
)
# # property types
# property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
# combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
# left_on="Property Type",
# right_on="Property Type",
# suffixes=("_reduced_sample", "_overall")
# )
combined = new_asset_data.merge(
new_sustainability_data,
left_on="UPRN",
right_on="Org Ref",
suffixes=("_asset", "_sustainability")
).merge(
sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef"
)
reduced_sample = combined[
~combined["AH Tenure"].isin(
["Commercial",
"Freeholder",
"HOMEBUY / EQUITY LOAN",
"Leaseholder",
"Outright Sale",
"SHARED EQUITY",
"Shared Ownership"]
)
].copy()
# # lodged ratings
# lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts(
# normalize=True).to_frame().reset_index().merge(
# combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(),
# left_on="Lodged EPC Band",
# right_on="Lodged EPC Band",
# suffixes=("_reduced_sample", "_overall")
# )
# property types
property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
left_on="Property Type",
right_on="Property Type",
suffixes=("_reduced_sample", "_overall")
)
# # modelled ratings
# modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts(
# normalize=True).to_frame().reset_index().merge(
# combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(),
# left_on="SAP Band",
# right_on="SAP Band",
# suffixes=("_reduced_sample", "_overall")
# )
# lodged ratings
lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts(
normalize=True).to_frame().reset_index().merge(
combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(),
left_on="Lodged EPC Band",
right_on="Lodged EPC Band",
suffixes=("_reduced_sample", "_overall")
)
# # Testing measures
# m1 = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
# "solid floor, ashp 3.0 - 20250113 final.xlsx"
# )
# m2 = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
# "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
# )
# modelled ratings
modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts(
normalize=True).to_frame().reset_index().merge(
combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(),
left_on="SAP Band",
right_on="SAP Band",
suffixes=("_reduced_sample", "_overall")
)
# compare = m1.merge(
# m2,
# left_on="uprn",
# right_on="uprn",
# suffixes=("_ewi_iwi", "_no_ewi_iwi")
# )
# Testing measures
m1 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
"solid floor, ashp 3.0 - 20250113 final.xlsx"
)
m2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
"solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
)
# # Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario
# only_no_ewi_iwi = compare[
# (compare["total_retrofit_cost_ewi_iwi"] == 0) &
# (compare["total_retrofit_cost_no_ewi_iwi"] != 0)
# ]
compare = m1.merge(
m2,
left_on="uprn",
right_on="uprn",
suffixes=("_ewi_iwi", "_no_ewi_iwi")
)
# (m1["total_retrofit_cost"] > 0).sum()
# (m2["total_retrofit_cost"] > 0).sum()
# Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario
only_no_ewi_iwi = compare[
(compare["total_retrofit_cost_ewi_iwi"] == 0) &
(compare["total_retrofit_cost_no_ewi_iwi"] != 0)
]
# with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0]
(m1["total_retrofit_cost"] > 0).sum()
(m2["total_retrofit_cost"] > 0).sum()
with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0]
z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])]
# z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])]

View file

@ -0,0 +1,51 @@
## Checklist for adding a new Lambda
### 1. Create the Lambda scaffold
- Copy the template:
cp -r lambda/_template lambda/<lambda_name>
---
### 2. Add infrastructure prerequisites (shared stack)
- Add a new ECR repository in:
infrastructure/terraform/shared/main.tf
- Apply the shared stack
- This requires commenting 'if env.stage == "prod"' in .github/workflows/deploy_terraform.yml
- Verify the ECR repository exists in AWS
---
### 3. Add Docker build configuration
- Create a `Dockerfile` for the Lambda
- Verify the Dockerfile path and build context
- Add a new image build job in `deploy_terraform.yml` using `_build_image.yml`
---
### 4. Wire the Lambda deploy job (CI)
- Add a deploy job using `_deploy_lambda.yml`
- Ensure the deploy job depends on the image build job
---
### 5. Deploy
- Push changes to GitHub
- CI will:
1. Build and push the Docker image
2. Deploy the Lambda
3. Verify everything deployed. Good things to check:
- ECR with image
- SQS
- Trigger SQS
- Cloud watch logs
---
### 5. Delete
1. Delete README if you used cp -r
---
## Please feel free to update this document to make it easier for the next person

View file

@ -0,0 +1,14 @@
module "lambda" {
source = "../modules/lambda_with_sqs"
name = REPLACE ME #"address2uprn" for example
stage = var.stage
image_uri = local.image_uri
environment = {
STAGE = var.stage
LOG_LEVEL = "info"
}
}

View file

@ -0,0 +1,16 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.16"
}
}
backend "s3" {
bucket = REPLACE_ME
key = "terraform.tfstate"
region = "eu-west-2"
}
required_version = ">= 1.2.0"
}

View file

@ -0,0 +1,27 @@
variable "lambda_name" {
type = string
description = "Logical name of the lambda (e.g. address2uprn)"
}
variable "stage" {
description = "Deployment stage (e.g. dev, prod)"
type = string
}
variable "ecr_repo_url" {
type = string
description = "ECR repository URL (no tag, no digest)"
}
variable "image_digest" {
type = string
description = "Image digest (sha256:...)"
}
locals {
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
}
output "resolved_image_uri" {
value = local.image_uri
}

View file

@ -0,0 +1,14 @@
module "address2uprn" {
source = "../modules/lambda_with_sqs"
name = "address2uprn"
stage = var.stage
image_uri = local.image_uri
environment = {
STAGE = var.stage
LOG_LEVEL = "info"
}
}

View file

@ -0,0 +1,17 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.16"
}
}
backend "s3" {
bucket = "address2uprn-terraform-state"
key = "terraform.tfstate"
region = "eu-west-2"
}
required_version = ">= 1.2.0"
}

View file

@ -0,0 +1,27 @@
variable "lambda_name" {
type = string
description = "Logical name of the lambda (e.g. address2uprn)"
}
variable "stage" {
description = "Deployment stage (e.g. dev, prod)"
type = string
}
variable "ecr_repo_url" {
type = string
description = "ECR repository URL (no tag, no digest)"
}
variable "image_digest" {
type = string
description = "Image digest (sha256:...)"
}
locals {
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
}
output "resolved_image_uri" {
value = local.image_uri
}

View file

@ -0,0 +1,44 @@
############################################
# IAM role
############################################
module "role" {
source = "../../../modules/lambda_execution_role"
name = "${var.name}-lambda-${var.stage}"
}
############################################
# SQS queue + DLQ
############################################
module "queue" {
source = "../../../modules/sqs_queue"
name = "${var.name}-queue-${var.stage}"
}
############################################
# Lambda
############################################
module "lambda" {
source = "../../../modules/lambda_service"
name = "${var.name}-${var.stage}"
role_arn = module.role.role_arn
image_uri = var.image_uri
timeout = var.timeout
memory_size = var.memory_size
environment = var.environment
}
############################################
# SQS Lambda trigger
############################################
module "sqs_trigger" {
source = "../../../modules/lambda_sqs_trigger"
lambda_arn = module.lambda.lambda_arn
lambda_role_name = module.role.role_name
queue_arn = module.queue.queue_arn
batch_size = var.batch_size
}

View file

@ -0,0 +1,11 @@
output "lambda_arn" {
value = module.lambda.lambda_arn
}
output "queue_arn" {
value = module.queue.queue_arn
}
output "queue_url" {
value = module.queue.queue_url
}

View file

@ -0,0 +1,36 @@
variable "name" {
type = string
}
variable "stage" {
type = string
}
variable "image_uri" {
type = string
}
variable "region" {
type = string
default = "eu-west-2"
}
variable "timeout" {
type = number
default = 60
}
variable "memory_size" {
type = number
default = 1024
}
variable "environment" {
type = map(string)
default = {}
}
variable "batch_size" {
type = number
default = 10
}

View file

@ -0,0 +1,30 @@
resource "aws_ecr_repository" "this" {
name = "${var.name}-${var.stage}"
image_tag_mutability = "MUTABLE"
image_scanning_configuration {
scan_on_push = true
}
}
resource "aws_ecr_lifecycle_policy" "this" {
repository = aws_ecr_repository.this.name
policy = jsonencode({
rules = [
{
rulePriority = 1
description = "Expire old images"
selection = {
tagStatus = "any"
countType = "imageCountMoreThan"
countNumber = var.retain_count
}
action = {
type = "expire"
}
}
]
})
}

View file

@ -0,0 +1,11 @@
output "repository_name" {
value = aws_ecr_repository.this.name
}
output "repository_url" {
value = aws_ecr_repository.this.repository_url
}
output "repository_arn" {
value = aws_ecr_repository.this.arn
}

View file

@ -0,0 +1,15 @@
variable "name" {
description = "Base name of the repository (without stage)"
type = string
}
variable "stage" {
description = "Deployment stage (e.g. dev, prod)"
type = string
}
variable "retain_count" {
description = "Number of images to retain"
type = number
default = 10
}

View file

@ -1,3 +1,6 @@
# This ecr works for things deployed by serverless.
# TODO: unify ecr and container_registry to one
resource "aws_ecr_repository" "my_repository" {
name = "${var.ecr_name}"
image_tag_mutability = "MUTABLE"

View file

@ -2,3 +2,9 @@ output "ecr_repository_name" {
description = "Name of the EPR repo in AWS"
value = aws_ecr_repository.my_repository.name
}
output "ecr_repository_url" {
description = "Full ECR repository URL"
value = aws_ecr_repository.my_repository.repository_url
}

View file

@ -0,0 +1,37 @@
data "aws_iam_policy_document" "assume" {
statement {
effect = "Allow"
principals {
type = "Service"
identifiers = ["lambda.amazonaws.com"]
}
actions = ["sts:AssumeRole"]
}
}
resource "aws_iam_role" "this" {
name = var.name
assume_role_policy = data.aws_iam_policy_document.assume.json
}
resource "aws_iam_role_policy_attachment" "basic_logs" {
role = aws_iam_role.this.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
}
resource "aws_iam_role_policy" "ecr_pull" {
role = aws_iam_role.this.name
policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Effect = "Allow"
Action = [
"ecr:GetAuthorizationToken",
"ecr:BatchGetImage",
"ecr:GetDownloadUrlForLayer"
]
Resource = "*"
}]
})
}

View file

@ -0,0 +1,7 @@
output "role_arn" {
value = aws_iam_role.this.arn
}
output "role_name" {
value = aws_iam_role.this.name
}

View file

@ -0,0 +1,4 @@
variable "name" {
description = "IAM role name for the Lambda execution role"
type = string
}

View file

@ -0,0 +1,15 @@
resource "aws_lambda_function" "this" {
function_name = var.name
role = var.role_arn
package_type = "Image"
image_uri = var.image_uri
timeout = var.timeout
memory_size = var.memory_size
publish = true
environment {
variables = var.environment
}
}

View file

@ -0,0 +1,3 @@
output "lambda_arn" {
value = aws_lambda_function.this.arn
}

View file

@ -0,0 +1,18 @@
variable "name" { type = string }
variable "role_arn" { type = string }
variable "image_uri" { type = string }
variable "timeout" {
type = number
default = 30
}
variable "memory_size" {
type = number
default = 512
}
variable "environment" {
type = map(string)
default = {}
}

View file

@ -0,0 +1,23 @@
resource "aws_lambda_event_source_mapping" "this" {
event_source_arn = var.queue_arn
function_name = var.lambda_arn
batch_size = var.batch_size
enabled = true
}
resource "aws_iam_role_policy" "allow_sqs" {
role = var.lambda_role_name
policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Effect = "Allow"
Action = [
"sqs:ReceiveMessage",
"sqs:DeleteMessage",
"sqs:GetQueueAttributes"
]
Resource = var.queue_arn
}]
})
}

View file

@ -0,0 +1,8 @@
variable "lambda_arn" { type = string }
variable "lambda_role_name" { type = string }
variable "queue_arn" { type = string }
variable "batch_size" {
type = number
default = 10
}

View file

@ -0,0 +1,14 @@
resource "aws_sqs_queue" "dlq" {
name = "${var.name}-dlq"
}
resource "aws_sqs_queue" "this" {
name = var.name
visibility_timeout_seconds = 120
redrive_policy = jsonencode({
deadLetterTargetArn = aws_sqs_queue.dlq.arn
maxReceiveCount = var.max_receive_count
})
}

View file

@ -0,0 +1,7 @@
output "queue_arn" {
value = aws_sqs_queue.this.arn
}
output "queue_url" {
value = aws_sqs_queue.this.url
}

View file

@ -0,0 +1,6 @@
variable "name" { type = string }
variable "max_receive_count" {
type = number
default = 5
}

View file

@ -0,0 +1,30 @@
resource "aws_s3_bucket" "this" {
bucket = var.bucket_name
}
resource "aws_s3_bucket_versioning" "this" {
bucket = aws_s3_bucket.this.id
versioning_configuration {
status = "Enabled"
}
}
resource "aws_s3_bucket_server_side_encryption_configuration" "this" {
bucket = aws_s3_bucket.this.id
rule {
apply_server_side_encryption_by_default {
sse_algorithm = "AES256"
}
}
}
resource "aws_s3_bucket_public_access_block" "this" {
bucket = aws_s3_bucket.this.id
block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
}

View file

@ -0,0 +1,7 @@
output "bucket_name" {
value = aws_s3_bucket.this.bucket
}
output "bucket_arn" {
value = aws_s3_bucket.this.arn
}

View file

@ -0,0 +1,3 @@
variable "bucket_name" {
type = string
}

View file

@ -1,5 +1,4 @@
stage = "dev"
profile = "DevAdmin"
region = "eu-west-2"
# Domain

View file

@ -8,7 +8,6 @@ terraform {
backend "s3" {
bucket = "assessment-model-terraform-state"
region = "eu-west-2"
profile = "DevAdmin"
key = "terraform.tfstate"
}
@ -16,7 +15,6 @@ terraform {
}
provider "aws" {
profile = var.profile
region = var.region
}
@ -91,101 +89,101 @@ resource "aws_db_instance" "default" {
# Set up the bucket that recieve the csv uploads of epc to be retrofit
module "s3_presignable_bucket" {
source = "./modules/s3_presignable_bucket"
source = "../modules/s3_presignable_bucket"
bucketname = "retrofit-plan-inputs-${var.stage}"
environment = var.stage
allowed_origins = var.allowed_origins
}
module "s3_due_considerations_bucket" {
source = "./modules/s3_presignable_bucket"
source = "../modules/s3_presignable_bucket"
bucketname = "retrofit-due-considerations-${var.stage}"
environment = var.stage
allowed_origins = var.allowed_origins
}
module "s3_eco_spreadseet_bucket" {
source = "./modules/s3_presignable_bucket"
source = "../modules/s3_presignable_bucket"
bucketname = "retrofit-eco-spreadsheet-${var.stage}"
environment = var.stage
allowed_origins = var.allowed_origins
}
module "s3" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-datalake-${var.stage}"
allowed_origins = var.allowed_origins
}
module "model_directory" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-model-directory-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_sap_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-sap-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_sap_data" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-data-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_carbon_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-carbon-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_heat_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-heat-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_lighting_cost_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-lighting-cost-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_heating_cost_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-heating-cost-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_hot_water_cost_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-hot-water-cost-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_heating_kwh_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-heating-kwh-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_hotwater_kwh_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-hotwater-kwh-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_sap_baseline_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-sap-baseline-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
// We make this bucket presignable, because we want to generate download links for the frontend
module "retrofit_energy_assessments" {
source = "./modules/s3_presignable_bucket"
source = "../modules/s3_presignable_bucket"
bucketname = "retrofit-energy-assessments-${var.stage}"
allowed_origins = var.allowed_origins
environment = var.stage
@ -193,7 +191,7 @@ module "retrofit_energy_assessments" {
# Set up the route53 record for the API
module "route53" {
source = "./modules/route53"
source = "../modules/route53"
domain_name = var.domain_name
api_url_prefix = var.api_url_prefix
providers = {
@ -201,75 +199,76 @@ module "route53" {
}
}
# Create an ECR repository for storage of the lambda's docker images
module "ecr" {
ecr_name = "fastapi-repository-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "lambda_sap_prediction_ecr" {
ecr_name = "lambda-sap-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "due_considerations_ecr" {
ecr_name = "due-considerations-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "eco_spreadsheet_ecr" {
ecr_name = "eco-spreadsheet-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "lambda_carbon_prediction_ecr" {
ecr_name = "lambda-carbon-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "lambda_heat_prediction_ecr" {
ecr_name = "lambda-heat-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
# ECR repos for lighting cost, heating cost and hot water cost models
module "lambda_lighting_cost_prediction_ecr" {
ecr_name = "lighting-cost-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "lambda_heating_cost_prediction_ecr" {
ecr_name = "heating-cost-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "lambda_hot_water_cost_prediction_ecr" {
ecr_name = "hot-water-cost-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
# For heating and hot water kwh models
module "lambda_heating_kwh_prediction_ecr" {
ecr_name = "heating-kwh-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "lambda_hotwater_kwh_prediction_ecr" {
ecr_name = "hotwater-kwh-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
# Baselining models
module "sap_baseline_ecr" {
ecr_name = "sap-baseline-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
##############################################
# CDN - Cloudfront
##############################################
module "cloudfront_distribution" {
source = "./modules/cloudfront"
source = "../modules/cloudfront"
bucket_name = module.s3.bucket_name
bucket_id = module.s3.bucket_id
bucket_arn = module.s3.bucket_arn
@ -281,7 +280,7 @@ module "cloudfront_distribution" {
# SES - Email sending
################################################
module "ses" {
source = "./modules/ses"
source = "../modules/ses"
domain_name = "domna.homes"
stage = var.stage
}
@ -289,3 +288,27 @@ module "ses" {
output "ses_dns_records" {
value = module.ses.dns_records
}
################################################
# Address2UPRN Lambda ECR
################################################
module "address2uprn_state_bucket" {
source = "../modules/tf_state_bucket"
bucket_name = "address2uprn-terraform-state"
}
output "address2uprn_state_bucket_name" {
value = module.address2uprn_state_bucket.bucket_name
}
module "address2uprn_registry" {
source = "../modules/container_registry"
name = "address2uprn"
stage = var.stage
}
output "address2uprn_repository_url" {
value = module.address2uprn_registry.repository_url
}

View file

@ -3,11 +3,6 @@ variable stage {
type = string
}
variable "profile" {
description = "AWS profile to use"
type = string
}
variable "region" {
description = "AWS region"
type = string

View file

@ -1,4 +1,4 @@
pydantic==2.9.2
pydantic>=1.10.7
pydantic-settings==2.6.0
epc-api-python==1.0.2
numpy==2.1.2

View file

@ -1,4 +1,4 @@
[pytest]
pythonpath = .
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/onboarders/tests
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests

View file

@ -7,24 +7,29 @@ import numpy as np
from backend.app.utils import sap_to_epc
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine, db_read_session
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel, PropertyDetailsSpatial
from backend.app.db.models.recommendations import (
Recommendation,
Plan,
PlanRecommendations,
RecommendationMaterials,
)
from backend.app.db.models.portfolio import (
PropertyModel,
PropertyDetailsEpcModel,
PropertyDetailsSpatial,
)
from backend.app.db.functions.materials_functions import get_materials
from collections import defaultdict
from sqlalchemy import func
# PORTFOLIO_ID = 206
# SCENARIOS = [389]
PORTFOLIO_ID = 435 # Peabody
PORTFOLIO_ID = 502 # Peabody
SCENARIOS = [
908,
909,
910,
986,
]
scenario_names = {
908: "EPC C - no solid floor, ashp 3.0",
909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0",
910: "EPC B - no solid floor, no EWI, ashp 3.0"
986: "EPC C",
}
@ -35,22 +40,26 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Properties
# --------------------
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
properties_query = (
session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id
).all()
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
)
properties_data = [
{
**{col.name: getattr(p.PropertyModel, col.name)
for col in PropertyModel.__table__.columns},
**{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns},
**{
col.name: getattr(p.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(p.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for p in properties_query
]
@ -62,13 +71,10 @@ def get_data(portfolio_id, scenario_ids):
session.query(
Plan.scenario_id,
Plan.property_id,
func.max(Plan.created_at).label("latest_created_at")
func.max(Plan.created_at).label("latest_created_at"),
)
.filter(Plan.scenario_id.in_(scenario_ids))
.group_by(
Plan.scenario_id,
Plan.property_id
)
.group_by(Plan.scenario_id, Plan.property_id)
.subquery()
)
@ -80,9 +86,9 @@ def get_data(portfolio_id, scenario_ids):
session.query(Plan)
.join(
latest_plans_subq,
(Plan.scenario_id == latest_plans_subq.c.scenario_id) &
(Plan.property_id == latest_plans_subq.c.property_id) &
(Plan.created_at == latest_plans_subq.c.latest_created_at)
(Plan.scenario_id == latest_plans_subq.c.scenario_id)
& (Plan.property_id == latest_plans_subq.c.property_id)
& (Plan.created_at == latest_plans_subq.c.latest_created_at),
)
.all()
)
@ -107,28 +113,29 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Recommendations (NO materials yet)
# --------------------
recommendations_query = session.query(
Recommendation,
Plan.scenario_id,
PlanRecommendations.plan_id
).join(
recommendations_query = (
session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan,
Plan.id == PlanRecommendations.plan_id
).filter(
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(Plan, Plan.id == PlanRecommendations.plan_id)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),
Recommendation.already_installed.is_(False)
).all()
Recommendation.already_installed.is_(False),
)
.all()
)
recommendations_data = [
{
**{col.name: getattr(r.Recommendation, col.name)
for col in Recommendation.__table__.columns},
**{
col.name: getattr(r.Recommendation, col.name)
for col in Recommendation.__table__.columns
},
"scenario_id": r.scenario_id,
"materials": [] # placeholder
"materials": [], # placeholder
}
for r in recommendations_query
]
@ -138,23 +145,25 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Recommendation materials (SEPARATE QUERY)
# --------------------
materials_query = session.query(
RecommendationMaterials
).filter(
RecommendationMaterials.recommendation_id.in_(recommendation_ids)
).all()
materials_query = (
session.query(RecommendationMaterials)
.filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids))
.all()
)
# Group materials by recommendation_id
materials_by_recommendation = defaultdict(list)
for m in materials_query:
materials_by_recommendation[m.recommendation_id].append({
materials_by_recommendation[m.recommendation_id].append(
{
"material_id": m.material_id,
"depth": m.depth,
"quantity": m.quantity,
"quantity_unit": m.quantity_unit,
"estimated_cost": m.estimated_cost,
})
}
)
# Attach materials safely (no filtering side effects)
for r in recommendations_data:
@ -165,7 +174,9 @@ def get_data(portfolio_id, scenario_ids):
return properties_data, plans_data, recommendations_data
properties_data, plans_data, recommendations_data = get_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS)
properties_data, plans_data, recommendations_data = get_data(
portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS
)
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
@ -176,10 +187,8 @@ with db_read_session() as session:
materials = pd.DataFrame(materials)
material_lookup = (
materials
.set_index("id")[["type", "includes_battery"]]
.to_dict("index")
material_lookup = materials.set_index("id")[["type", "includes_battery"]].to_dict(
"index"
)
@ -193,14 +202,14 @@ def has_solar_with_battery(materials_list):
return False
recommendations_df["has_solar_with_battery"] = (
recommendations_df["materials"].apply(has_solar_with_battery)
recommendations_df["has_solar_with_battery"] = recommendations_df["materials"].apply(
has_solar_with_battery
)
recommendations_df["measure_type"] = np.where(
recommendations_df["has_solar_with_battery"] == True,
recommendations_df["measure_type"] + "_with_battery",
recommendations_df["measure_type"]
recommendations_df["measure_type"],
)
# Adjust material type to indicate if there is a battery included
@ -215,50 +224,67 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3
for scenario_id in SCENARIOS:
# Get recs for this scenario
recommended_measures_df = recommendations_df[recommendations_df["scenario_id"] == scenario_id][
["property_id", "measure_type", "estimated_cost", "default"]
recommended_measures_df = recommendations_df[
recommendations_df["scenario_id"] == scenario_id
][["property_id", "measure_type", "estimated_cost", "default"]]
recommended_measures_df = recommended_measures_df[
recommended_measures_df["default"]
]
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
post_install_sap = recommendations_df[recommendations_df["scenario_id"] == scenario_id][
["property_id", "default", "sap_points"]]
post_install_sap = recommendations_df[
recommendations_df["scenario_id"] == scenario_id
][["property_id", "default", "sap_points"]]
post_install_sap = post_install_sap[post_install_sap["default"]]
# Sum up the sap points by property id
post_install_sap = post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index()
post_install_sap = (
post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index()
)
# Find dupes by property id and measure type
dupes = recommended_measures_df.duplicated(subset=["property_id", "measure_type"], keep=False)
dupes = recommended_measures_df.duplicated(
subset=["property_id", "measure_type"], keep=False
)
dupe_df = recommended_measures_df[dupes]
if dupe_df.shape:
# Drop dupes - happened due to a funny bug
recommended_measures_df = recommended_measures_df.drop_duplicates(
subset=["property_id", "measure_type"], keep='first'
subset=["property_id", "measure_type"], keep="first"
)
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
index="property_id", columns="measure_type", values="estimated_cost"
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
# Total cost is the row sum, excluding the property_id column
recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop(
columns=["property_id"]
).sum(axis=1)
recommendations_measures_pivot["total_retrofit_cost"] = (
recommendations_measures_pivot.drop(columns=["property_id"]).sum(axis=1)
)
df = properties_df[
df = (
properties_df[
[
"landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
"heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
"id"
"landlord_property_id",
"property_id",
"uprn",
"address",
"postcode",
"property_type",
"walls",
"roof",
"heating",
"windows",
"current_epc_rating",
"current_sap_points",
"total_floor_area",
"number_of_rooms",
"id",
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
post_install_sap, how="left", on="property_id"
]
.merge(recommendations_measures_pivot, how="left", on="property_id")
.merge(post_install_sap, how="left", on="property_id")
)
# df = df.drop(columns=["property_id"])
@ -266,20 +292,24 @@ for scenario_id in SCENARIOS:
df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
df["predicted_post_works_sap"] = df["predicted_post_works_sap"]
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(
lambda x: sap_to_epc(x)
)
df["uprn"] = df["uprn"].astype(str)
relevant_plans = plans_df[plans_df["scenario_id"] == scenario_id]
df2 = df.merge(
relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]], how="left", on="property_id",
suffixes=("", "_plan")
relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]],
how="left",
on="property_id",
suffixes=("", "_plan"),
)
print(df2["predicted_post_works_epc"].value_counts())
print(df2["post_epc_rating"].value_counts())
z = df2[
(df2["predicted_post_works_epc"] != "D") &
(df2["post_epc_rating"].astype(str) == "Epc.D")
(df2["predicted_post_works_epc"] != "D")
& (df2["post_epc_rating"].astype(str) == "Epc.D")
]
df2["predicted_post_works_epc"].value_counts()
@ -295,183 +325,6 @@ for scenario_id in SCENARIOS:
df[df["predicted_post_works_sap"] == ""]
# Create excel to store to
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
f"Project/Final SAL/scenarios/{scenario_names[scenario_id]} - 20250114 final.xlsx")
filename = f"{scenario_names[scenario_id]} - 20250113 final.xlsx"
with pd.ExcelWriter(filename) as writer:
df.to_excel(writer, sheet_name="properties", index=False)
# asset_list = pd.DataFrame(asset_list)
# asset_list = asset_list.rename(
# columns={
# "postcode": "domna_postcode"
# }
# )
# if "domna_full_address":
# # For Peabody
# asset_list["domna_full_address"] = asset_list["domna_address_1"]
#
# asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy()
# asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"})
# asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str)
# asset_list = asset_list.merge(
# df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
# how="left",
# on="uprn"
# )
# Get conservation area data from property details spatial. based on the UPRNs
def get_conservation_area_data(uprns):
session = sessionmaker(bind=db_engine)()
session.begin()
# Query to get conservation area data
spatial_query = session.query(
PropertyDetailsSpatial
).filter(
PropertyDetailsSpatial.uprn.in_(uprns) # Filter by UPRNs
).all()
# Transform spatial data to include all fields dynamically
spatial_data = [
{col.name: getattr(spatial, col.name) for col in PropertyDetailsSpatial.__table__.columns}
for spatial in spatial_query
]
session.close()
return pd.DataFrame(spatial_data)
uprns = asset_list[
~pd.isna(asset_list["uprn"]) & (asset_list["uprn"] != "<NA>")
]["uprn"].astype(int).unique().tolist()
conservation_area_data = get_conservation_area_data(uprns)
conservation_area_data["uprn"] = conservation_area_data["uprn"].astype(str)
asset_list = asset_list.merge(
conservation_area_data[["uprn", "conservation_status", "is_listed_building", "is_heritage_building"]],
how="left",
on="uprn"
)
# For exporting
df.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/EPC C -without floors proposed measures - "
"with ID.xlsx",
index=False
)
# asset_list.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/epc_measures.xlsx",
# index=False
# )
condition_costs = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx",
sheet_name="Prices - Khalim",
header=35
)
# Remove unnamed columns and reset index
condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')]
condition_costs = condition_costs.reset_index(drop=True)
# We now estimate condition cost
def simulate_condition(asset_list, condition_costs):
"""
This function is for testing, and will simulate condition cost from 1-10 for each property to see what the
costing array looks like.
:param df:
:return:
"""
condition_df = []
for _, row in asset_list.iterrows():
n_bathrooms = row["bathrooms"]
conditions = {}
for condition in reversed(range(1, 11)):
condition_cost = condition_costs[
condition_costs["Condition"] == condition
].drop(columns=["Condition"]).iloc[0]
# Each cost is scaled by floor area
condition_cost = condition_cost * row["total_floor_area"]
condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms
total_condition_cost = condition_cost.sum()
conditions["Condition " + str(condition)] = (total_condition_cost)
condition_df.append(
{
"uprn": row["uprn"],
**conditions
}
)
condition_df = pd.DataFrame(condition_df)
asset_list = asset_list.merge(
condition_df,
how="left",
on="uprn"
)
return asset_list
# asset_list = simulate_condition(asset_list, condition_costs)
# We calculate the condition cost based on the condition
for _, row in asset_list.iterrows():
condition = row["condition_score"]
if condition in [None, ""]:
continue
condition = int(float(condition))
condition_cost = condition_costs[
condition_costs["Condition"] == condition
].drop(columns=["Condition"]).iloc[0]
# Each cost is scaled by floor area
condition_cost = condition_cost * float(row["total_floor_area"])
n_bathrooms = row["n_bathrooms"]
condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms)
total_condition_cost = condition_cost.sum()
asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost
# Store output
asset_list.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx",
index=False
)
condition_cost_comparison = asset_list[
["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"]
]
# Testing
plans_df.head()
example = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
"SAL/scenarios/EPC C - no solid floor, no EWI or IWI, ashp 3.0 - 20250114 final.xlsx"
)
plans_df2 = plans_df.merge(
properties_df[["property_id", "landlord_property_id"]],
left_on="property_id",
right_on="property_id",
how="left"
)
plans_df2 = plans_df2[plans_df2["scenario_id"] == 909]
dupes = plans_df2[plans_df2["property_id"].duplicated()]
# merge on plans
example = example.merge(
plans_df, how="left",
)