Merge pull request #700 from Hestia-Homes/main

Hot fixes for ara bugs + rolling out new terraform & github workflows structure
This commit is contained in:
KhalimCK 2026-02-10 14:48:41 +00:00 committed by GitHub
commit 44f4185a7a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
156 changed files with 6806 additions and 1275 deletions

View file

@ -0,0 +1,40 @@
FROM python:3.11.10-bullseye
ARG USER=vscode
ARG DEBIAN_FRONTEND=noninteractive
# 1) Toolchain + utilities for building libpostal
RUN apt-get update && apt-get install -y --no-install-recommends \
sudo jq vim curl git ca-certificates \
build-essential pkg-config automake autoconf libtool \
&& rm -rf /var/lib/apt/lists/*
# # 2) Build and install libpostal from source
RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
&& cd /tmp/libpostal \
&& ./bootstrap.sh \
&& ./configure --datadir=/usr/local/share/libpostal \
&& make -j"$(nproc)" \
&& make install \
&& ldconfig \
&& rm -rf /tmp/libpostal
# 3) Create the user and grant sudo privileges
RUN useradd -m -s /usr/bin/bash ${USER} \
&& echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \
&& chmod 0440 /etc/sudoers.d/${USER}
# # 4) Python deps - if you want to run assest list
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
ADD .devcontainer/asset_list/requirements.txt requirements2.txt
ADD asset_list/requirements.txt requirements1.txt
RUN cat requirements1.txt requirements2.txt >> requirements.txt
RUN pip install -r requirements.txt
# 5) Workdir
WORKDIR /workspaces/model
# 6) Make Python find your package
# Add project root to PYTHONPATH for all processes
ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}

View file

@ -1,7 +1,7 @@
{
"name": "Basic Python",
"name": "SAL ENV",
"dockerComposeFile": "docker-compose.yml",
"service": "model",
"service": "model-sal",
"remoteUser": "vscode",
"workspaceFolder": "/workspaces/model",
"postStartCommand": "bash .devcontainer/post-install.sh",

View file

@ -1,14 +1,14 @@
version: '3.8'
services:
model:
model-sal:
user: "${UID}:${GID}"
build:
context: ..
dockerfile: .devcontainer/Dockerfile
context: ../..
dockerfile: .devcontainer/asset_list/Dockerfile
command: sleep infinity
volumes:
- ..:/workspaces/model
- ../../:/workspaces/model
networks:
- model-net

View file

@ -11,4 +11,4 @@ if os.path.exists(env_path):
print("✔ Loaded .env into Jupyter kernel")
else:
print("⚠ No .env file found to load")
EOF
EOF

View file

@ -0,0 +1,23 @@
fastapi==0.115.2
sqlalchemy==2.0.36
psycopg2-binary==2.9.10
python-jose==3.3.0
cryptography==43.0.3
mangum==0.19.0
# AWS
boto3==1.35.44
# Data
openpyxl==3.1.2
# Basic
pytz
uvicorn[standard]
# Testing
pytest==9.0.2
pytest-cov==7.0.0
ipykernel>=6.25,<7
pyyaml>=6.0.1
sqlmodel
# Formatting
black==26.1.0
dotenv
pydantic-settings

View file

@ -34,7 +34,7 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
ADD backend/engine/requirements.txt requirements1.txt
ADD backend/app/requirements/requirements.txt requirements2.txt
ADD .devcontainer/requirements.txt requirements3.txt
ADD .devcontainer/backend/requirements.txt requirements3.txt
RUN cat requirements1.txt requirements2.txt requirements3.txt > requirements.txt
RUN pip install -r requirements.txt

View file

@ -0,0 +1,40 @@
{
"name": "Backend Model Env",
"dockerComposeFile": "docker-compose.yml",
"service": "model-backend",
"remoteUser": "vscode",
"workspaceFolder": "/workspaces/model",
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
"mounts": [
"source=${localEnv:HOME},target=/workspaces/home,type=bind"
],
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-toolsai.jupyter",
"mechatroner.rainbow-csv",
"ms-toolsai.datawrangler",
"lindacong.vscode-book-reader",
"4ops.terraform",
"fabiospampinato.vscode-todo-plus",
"jgclark.vscode-todo-highlight",
"corentinartaud.pdfpreview",
"ms-python.vscode-python-envs",
"ms-python.black-formatter",
"waderyan.gitblame"
],
"settings": {
"files.defaultWorkspace": "/workspaces/model",
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true
},
"python.formatting.provider": "none"
}
}
},
"containerEnv": {
"PYTHONFLAGS": "-Xfrozen_modules=off"
}
}

View file

@ -0,0 +1,28 @@
version: '3.8'
services:
model-backend:
user: "${UID}:${GID}"
build:
context: ../..
dockerfile: .devcontainer/backend/Dockerfile
command: sleep infinity
volumes:
- ../../:/workspaces/model
db:
image: postgres:17.4
restart: unless-stopped
ports:
- 5432:5432
environment:
- PGDATABASE=tech_team_local_db
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=makingwarmerhomes
volumes:
- postgres-data-two:/var/lib/postgresql/data
volumes:
postgres-data-two:

View file

@ -0,0 +1,14 @@
mkdir -p ~/.ipython/profile_default/startup
cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py
from dotenv import load_dotenv
import os
# Adjust path as needed
env_path = "/workspaces/model/backend/.env"
if os.path.exists(env_path):
load_dotenv(env_path)
print("✔ Loaded .env into Jupyter kernel")
else:
print("⚠ No .env file found to load")
EOF

View file

@ -1,4 +1,4 @@
# fastapi
fastapi==0.115.2
sqlalchemy==2.0.36
pydantic-settings==2.6.0

107
.github/workflows/_build_image.yml vendored Normal file
View file

@ -0,0 +1,107 @@
name: Build Docker image
on:
workflow_call:
inputs:
ecr_repo:
required: true
type: string
dockerfile_path:
required: true
type: string
build_context:
required: false
default: "."
type: string
build_args:
required: false
type: string
outputs:
image_digest:
description: "Pushed image digest"
value: ${{ jobs.build.outputs.image_digest }}
ecr_repo_url:
description: "ECR repository URL"
value: ${{ jobs.build.outputs.ecr_repo_url }}
secrets:
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
AWS_REGION:
required: true
DEV_DB_HOST:
required: false
DEV_DB_PORT:
required: false
DEV_DB_NAME:
required: false
jobs:
build:
runs-on: ubuntu-latest
env:
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
outputs:
image_digest: ${{ steps.digest.outputs.image_digest }}
ecr_repo_url: ${{ steps.repo.outputs.ecr_repo_url }}
steps:
- uses: actions/checkout@v4
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- uses: aws-actions/amazon-ecr-login@v2
- name: Resolve ECR repo URL
id: repo
run: |
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
ECR_REPO_URL="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${{ inputs.ecr_repo }}"
echo "Resolved ECR repo URL (local var):"
echo "$ECR_REPO_URL"
echo "ecr_repo_url=$ECR_REPO_URL" >> "$GITHUB_OUTPUT"
- name: Build & push image
run: |
IMAGE_URI="${{ steps.repo.outputs.ecr_repo_url }}:${GITHUB_SHA}"
# Writes build args and removes line breaks
BUILD_ARGS=""
while IFS= read -r line; do
# skip empty lines
[ -n "$line" ] || continue
temp=$(eval echo "$line")
BUILD_ARGS="$BUILD_ARGS --build-arg $temp"
done <<< "${{ inputs.build_args }}"
docker build \
-f ${{ inputs.dockerfile_path }} \
$BUILD_ARGS \
-t $IMAGE_URI \
${{ inputs.build_context }}
docker push $IMAGE_URI
- name: Resolve image digest
id: digest
run: |
DIGEST=$(aws ecr describe-images \
--repository-name ${{ inputs.ecr_repo }} \
--image-ids imageTag=${GITHUB_SHA} \
--query 'imageDetails[0].imageDigest' \
--output text)
echo "image_digest=$DIGEST" >> "$GITHUB_OUTPUT"

91
.github/workflows/_deploy_lambda.yml vendored Normal file
View file

@ -0,0 +1,91 @@
name: Deploy Lambda (Terraform)
on:
workflow_call:
inputs:
lambda_name:
required: true
type: string
lambda_path:
required: true
type: string
stage:
required: true
type: string
ecr_repo:
required: true
type: string
image_digest:
required: true
type: string
secrets:
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
AWS_REGION:
required: true
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Debug inputs
run: |
echo "lambda_name=${{ inputs.lambda_name }}"
echo "lambda_path=${{ inputs.lambda_path }}"
echo "stage=${{ inputs.stage }}"
echo "ecr_repo_url=${{ inputs.ecr_repo_url }}"
echo "image_digest=${{ inputs.image_digest }}"
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- uses: hashicorp/setup-terraform@v3
- uses: aws-actions/amazon-ecr-login@v2
- name: Resolve ECR repo URL
id: repo
env:
AWS_REGION: ${{ secrets.AWS_REGION }}
run: |
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
ECR_REPO_URL="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${{ inputs.ecr_repo }}"
echo "ecr_repo_url=$ECR_REPO_URL" >> "$GITHUB_OUTPUT"
- name: Terraform Init
working-directory: ${{ inputs.lambda_path }}
run: terraform init -reconfigure
- name: Terraform Workspace
working-directory: ${{ inputs.lambda_path }}
run: |
terraform workspace select ${{ inputs.stage }} \
|| terraform workspace new ${{ inputs.stage }}
- name: Terraform Plan
working-directory: ${{ inputs.lambda_path }}
run: |
terraform plan \
-var="stage=${{ inputs.stage }}" \
-var="lambda_name=${{ inputs.lambda_name }}" \
-var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \
-var="image_digest=${{ inputs.image_digest }}" \
-out=lambdaplan
- name: Terraform Apply
working-directory: ${{ inputs.lambda_path }}
run: terraform apply -auto-approve lambdaplan

View file

@ -1,80 +1,172 @@
name: Deploy terraform stack
name: Deploy infrastructure
on:
push:
branches:
- dev
- prod
- "**"
jobs:
deploy:
determine_stage:
runs-on: ubuntu-latest
outputs:
stage: ${{ steps.set-stage.outputs.stage }}
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Setup AWS credentials file
- name: Determine stage from branch
id: set-stage
shell: bash
run: |
mkdir -p ~/.aws
echo "[DevAdmin]" > ~/.aws/credentials
echo "aws_access_key_id = ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}" >> ~/.aws/credentials
echo "aws_secret_access_key = ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}" >> ~/.aws/credentials
echo "[ProdAdmin]" >> ~/.aws/credentials
echo "aws_access_key_id = ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}" >> ~/.aws/credentials
echo "aws_secret_access_key = ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}" >> ~/.aws/credentials
env
BRANCH="${GITHUB_REF_NAME}"
- name: Setup AWS config file
run: |
echo "[profile DevAdmin]" > ~/.aws/config
echo "region = eu-west-2" >> ~/.aws/config
echo "[profile ProdAdmin]" >> ~/.aws/config
echo "region = eu-west-2" >> ~/.aws/config
if [[ "$BRANCH" == "prod" ]]; then
echo "stage=prod" >> "$GITHUB_OUTPUT"
- name: Setup Terraform
uses: hashicorp/setup-terraform@v1
with:
terraform_version: 1.5.2
elif [[ "$BRANCH" == "dev" ]]; then
echo "stage=dev" >> "$GITHUB_OUTPUT"
- name: Configure AWS credentials (DevAdmin)
uses: aws-actions/configure-aws-credentials@v1
else
echo "stage=dev" >> "$GITHUB_OUTPUT"
fi
# ============================================================
# 1⃣ Shared Terraform (infra)
# ============================================================
shared_terraform:
needs: determine_stage
runs-on: ubuntu-latest
env:
STAGE: ${{ needs.determine_stage.outputs.stage }}
steps:
- uses: actions/checkout@v4
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-2
env:
AWS_PROFILE: "DevAdmin"
aws-region: ${{ secrets.DEV_AWS_REGION }}
- uses: hashicorp/setup-terraform@v3
- name: Terraform Init
run: cd infrastructure/terraform && terraform init
working-directory: infrastructure/terraform/shared
run: terraform init -reconfigure
- name: Terraform Workspace
run: |
BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
cd infrastructure/terraform
terraform workspace select ${BRANCH_NAME} || terraform workspace new ${BRANCH_NAME}
working-directory: infrastructure/terraform/shared
run: terraform workspace select ${STAGE} || terraform workspace new ${STAGE}
- name: Terraform Plan
run: |
BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
cd infrastructure/terraform && terraform plan -var-file=${BRANCH_NAME}.tfvars
working-directory: infrastructure/terraform/shared
run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan
- name: Deploy to Dev
if: github.ref == 'refs/heads/dev'
run: cd infrastructure/terraform && terraform apply -var-file=dev.tfvars -auto-approve
env:
name: dev
- name: Terraform Apply
if: env.STAGE == 'prod'
working-directory: infrastructure/terraform/shared
run: terraform apply -auto-approve tfplan
- name: Configure AWS credentials (ProdAdmin)
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-2
env:
AWS_PROFILE: "ProdAdmin"
# ============================================================
# 2⃣ Build Address 2 UPRN image and Push
# ============================================================
address2uprn_image:
needs: [determine_stage, shared_terraform]
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/address2UPRN/handler/Dockerfile
build_context: .
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# 3⃣ Deploy Address 2 UPRN Lambda
# ============================================================
address2uprn_lambda:
needs: [address2uprn_image, determine_stage]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: address2uprn
lambda_path: infrastructure/terraform/lambda/address2UPRN
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.address2uprn_image.outputs.image_digest }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# 2⃣ Build Postcode Splitter image and Push
# ============================================================
postcodeSplitter_image:
needs: [determine_stage, shared_terraform]
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/postcode_splitter/handler/Dockerfile
build_context: .
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# 3⃣ Deploy Postcode Splitter Lambda
# ============================================================
postcodeSplitter_lambda:
needs: [postcodeSplitter_image, determine_stage]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: postcodeSplitter
lambda_path: infrastructure/terraform/lambda/postcodeSplitter
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# Condition ETL image and Push
# ============================================================
condition_etl_image:
needs: [determine_stage, shared_terraform]
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/condition/handler/Dockerfile
build_context: .
build_args: |
DEV_DB_HOST=$DEV_DB_HOST
DEV_DB_PORT=$DEV_DB_PORT
DEV_DB_NAME=$DEV_DB_NAME
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
# ============================================================
# Deploy Condition ETL Lambda
# ============================================================
condition_etl_lambda:
needs: [condition_etl_image, determine_stage]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: condition-etl
lambda_path: infrastructure/terraform/lambda/condition-etl
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.condition_etl_image.outputs.image_digest }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
- name: Deploy to Prod
if: github.ref == 'refs/heads/prod'
run: cd infrastructure/terraform && terraform apply -var-file=prod.tfvars -auto-approve
env:
name: prod

View file

@ -2,6 +2,12 @@ name: Run unit tests
on:
pull_request:
branches:
- "**"
push:
branches:
- "**"
jobs:
test:

View file

@ -9,9 +9,12 @@
"path": "/bin/bash"
}
},
<<<<<<< HEAD
=======
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.testing.pytestArgs": ["-s", "-q", "--no-cov"]
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
// Hot reload setting that needs to be in user settings
// "jupyter.runStartupCommands": [

View file

@ -34,7 +34,8 @@ from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
logger = setup_logger()
# OpenAI API Key (set this in your environment variables for security)
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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")
class DataRemapper:
@ -1159,13 +1160,17 @@ class AssetList:
),
axis=1
)
col = self.EPC_API_DATA_NAMES["roof-description"]
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = self.standardised_asset_list.apply(
lambda x: RoofAttributes(description=x[self.EPC_API_DATA_NAMES["roof-description"]]).process()[
lambda x: RoofAttributes(description=x[col]).process()[
"insulation_thickness"] if not pd.isnull(
x[self.EPC_API_DATA_NAMES["roof-description"]]) else None,
x[col]) else None,
axis=1
)
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = (
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].str.replace("+", "")
)

View file

@ -1,5 +1,5 @@
# OpenAI API Key (set this in your environment variables for security)
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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")
class DataRemapper:

0
asset_list/__init__.py Normal file
View file

View file

@ -14,22 +14,32 @@ from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
EPC_AUTH_TOKEN = os.getenv(
"EPC_AUTH_TOKEN",
)
def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"):
def extract_address1(
asset_list, full_address_col, postcode_col, method="first_two_words"
):
if method == "first_two_words":
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
asset_list["address1_extracted"] = (
asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
)
return asset_list
if method == "first_word":
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0]
asset_list["address1_extracted"] = (
asset_list[full_address_col].str.split(" ").str[0]
)
return asset_list
if method == "house_number_extraction":
asset_list["address1_extracted"] = asset_list.apply(
lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]),
axis=1
lambda x: SearchEpc.get_house_number(
address=x[full_address_col], postcode=x[postcode_col]
),
axis=1,
)
return asset_list
@ -59,24 +69,24 @@ def app():
Property UPRN
"""
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney"
data_filename = "Domna SHF Wave 3 (3).xlsx"
sheet_name = "Domna Wave 3"
postcode_column = 'Postcode'
address1_column = "Address 1"
address1_method = None
fulladdress_column = None
address_cols_to_concat = ["Address 1"]
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Aspire"
data_filename = "ASPIRE ASSET LIST.xlsx"
sheet_name = "Asset List"
postcode_column = "Postcode"
address1_column = None
address1_method = "house_number_extraction"
fulladdress_column = "Address"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Construction Years"
landlord_os_uprn = "UPRN"
landlord_property_type = "Type"
landlord_built_form = "Attachment"
landlord_wall_construction = "Wall type"
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Row ID"
landlord_property_id = "LLUPRN"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
@ -93,25 +103,27 @@ def app():
landlord_block_reference = None
# Peabody data for cleaning
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation")
data_folder = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation"
)
data_filename = "to_standardise_uprns.xlsx"
sheet_name = "Sheet1"
postcode_column = 'Postcode'
address1_column = "Address 1"
address1_method = None
fulladdress_column = None
address_cols_to_concat = ["Address 1", "Address 2", "Address 3"]
postcode_column = "Postcode"
address1_column = None
address1_method = "house_number_extraction"
fulladdress_column = "Address"
address_cols_to_concat = None
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Type"
landlord_built_form = "Attachment"
landlord_property_type = None
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Org Ref"
landlord_property_id = "LLUPRN"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
@ -127,40 +139,6 @@ def app():
asset_list_header = 0
landlord_block_reference = None
# Lambeth:
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th"
# data_filename = "lambeth_sw2_leigham court estate.xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Postcode'
# address1_column = "Address"
# address1_method = None
# fulladdress_column = None
# address_cols_to_concat = ["Address"]
# missing_postcodes_method = None
# landlord_year_built = None
# landlord_os_uprn = None
# landlord_property_type = None
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "row_id"
# landlord_sap = None
# outcomes_filename = None
# outcomes_sheetname = None
# outcomes_postcode = None
# outcomes_houseno = None
# outcomes_id = None
# outcomes_address = None
# master_filepaths = []
# master_id_colnames = []
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = None
# asset_list_header = 0
# landlord_block_reference = None
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}
@ -185,49 +163,62 @@ def app():
landlord_existing_pv=landlord_existing_pv,
landlord_sap=landlord_sap,
landlord_block_reference=landlord_block_reference,
phase=phase
phase=phase,
)
asset_list.init_standardise()
# We produce the new maps, which can be saved for future useage
new_property_type_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_property_type] if
asset_list.landlord_property_type else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_property_type]
if asset_list.landlord_property_type
else {}
).items()
if k not in PROPERTY_MAPPING
}
new_built_form_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_built_form] if
asset_list.landlord_built_form else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_built_form]
if asset_list.landlord_built_form
else {}
).items()
if k not in BUILT_FORM_MAPPINGS
}
new_wall_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_wall_construction] if
asset_list.landlord_wall_construction else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_wall_construction]
if asset_list.landlord_wall_construction
else {}
).items()
if k not in WALL_CONSTRUCTION_MAPPINGS
}
new_heating_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_heating_system] if
asset_list.landlord_heating_system else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_heating_system]
if asset_list.landlord_heating_system
else {}
).items()
if k not in HEATING_MAPPINGS
}
new_existing_pv_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_existing_pv]
if asset_list.landlord_existing_pv
else {}
).items()
if k not in EXISTING_PV_MAPPINGS
}
new_roof_construction_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_roof_construction] if
asset_list.landlord_roof_construction else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_roof_construction]
if asset_list.landlord_roof_construction
else {}
).items()
if k not in ROOF_CONSTRUCTION_MAPPINGS
}
@ -241,7 +232,7 @@ def app():
outcomes_address=outcomes_address,
outcomes_postcode=outcomes_postcode,
outcomes_houseno=outcomes_houseno,
outcomes_id=outcomes_id
outcomes_id=outcomes_id,
)
asset_list.flag_survey_master(
@ -275,14 +266,16 @@ def app():
skip = max(chunk_indexes)
if any(x in folder_contents for x in downloaded_files):
skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents])
skip = max(
[i for i in chunk_indexes if filename.format(i=i) in folder_contents]
)
for i in range(0, len(asset_list.standardised_asset_list), chunk_size):
print(f"Processing chunk {i} to {i + chunk_size}")
if skip is not None and not force_retrieve_data:
if i <= skip:
continue
chunk = asset_list.standardised_asset_list[i:i + chunk_size]
chunk = asset_list.standardised_asset_list[i: i + chunk_size]
epc_data_chunk, errors_chunk, no_epc_chunk = get_data(
df=chunk,
row_id_name=asset_list.DOMNA_PROPERTY_ID,
@ -294,7 +287,7 @@ def app():
built_form_column=AssetList.STANDARD_BUILT_FORM,
manual_uprn_map=manual_uprn_map,
epc_api_only=epc_api_only,
epc_auth_token=EPC_AUTH_TOKEN
epc_auth_token=EPC_AUTH_TOKEN,
)
# We now retrieve any failed properties
@ -317,7 +310,9 @@ def app():
# Append the failed data to the main data
# Store the chunk locally as a csv
pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False)
pd.DataFrame(epc_data_chunk).to_csv(
os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False
)
# Store the errors and no-data locally
with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f:
json.dump(errors_chunk, f)
@ -348,7 +343,9 @@ def app():
unique_recommendations = set()
for _, row in recommendations_df.iterrows():
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
unique_recommendations.update(
[rec["improvement-summary-text"] for rec in row["recommendations"]]
)
columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations)
transformed_data = []
@ -368,20 +365,24 @@ def app():
transformed_df = pd.DataFrame(transformed_data)
for col in [
"Floor insulation (solid floor)",
"Floor insulation", "Floor insulation (suspended floor)"
"Floor insulation",
"Floor insulation (suspended floor)",
]:
if col not in transformed_df.columns:
transformed_df[col] = False
transformed_df = transformed_df[
[
asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)",
"Floor insulation", "Floor insulation (suspended floor)"
asset_list.DOMNA_PROPERTY_ID,
"Floor insulation (solid floor)",
"Floor insulation",
"Floor insulation (suspended floor)",
]
]
transformed_df["epc_has_floor_recommendation"] = (
transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] |
transformed_df["Floor insulation (suspended floor)"]
transformed_df["Floor insulation (solid floor)"]
| transformed_df["Floor insulation"]
| transformed_df["Floor insulation (suspended floor)"]
)
# Get the find my epc data
@ -394,21 +395,20 @@ def app():
find_my_epc_data.append(
{
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID],
**x["find_my_epc_data"]
**x["find_my_epc_data"],
}
)
else:
find_my_epc_data.append(
{
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]
}
{asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]}
)
find_my_epc_data = pd.DataFrame(find_my_epc_data)
find_my_epc_data = find_my_epc_data.merge(
transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]],
how="left", on=asset_list.DOMNA_PROPERTY_ID
how="left",
on=asset_list.DOMNA_PROPERTY_ID,
)
# We check if we get the solar pv column:
@ -418,27 +418,33 @@ def app():
# Retrieve just the data we need
epc_df = epc_df[
[asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
].rename(
columns=asset_list.EPC_API_DATA_NAMES
)
].rename(columns=asset_list.EPC_API_DATA_NAMES)
# Look for columns not in the find my EPC data, which will have happened if we didn't
# retrieve it in the first place
missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns]
missed_find_epc_cols = [
c
for c in list(asset_list.FIND_EPC_DATA_NAMES.keys())
if c not in find_my_epc_data.columns
]
if missed_find_epc_cols:
for c in missed_find_epc_cols:
find_my_epc_data[c] = None
epc_df = epc_df.merge(
find_my_epc_data[
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())
]
.rename(columns=asset_list.FIND_EPC_DATA_NAMES),
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]
+ list(asset_list.FIND_EPC_DATA_NAMES.keys())
].rename(columns=asset_list.FIND_EPC_DATA_NAMES),
how="left",
on=asset_list.DOMNA_PROPERTY_ID
on=asset_list.DOMNA_PROPERTY_ID,
)
asset_list.merge_data(epc_df)
# asset_list.standardised_asset_list = asset_list.standardised_asset_list[
# asset_list.standardised_asset_list["domna_full_address"]
# != "120 Airdrie Crescent, Burnley, Lancashire"
# ]
asset_list.extract_attributes()
asset_list.identify_worktypes()
@ -448,7 +454,10 @@ def app():
asset_list.get_work_figures()
# Store as an excel
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
filename = (
os.path.join(data_folder, ".".join(data_filename.split(".")[:-1]))
+ " - Standardised.xlsx"
)
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
# Determine inspections priority
@ -472,26 +481,42 @@ def app():
# )
with pd.ExcelWriter(filename) as writer:
asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
asset_list.standardised_asset_list.to_excel(
writer, sheet_name="Standardised Asset List", index=False
)
if asset_list.block_analysis_df is not None:
asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
asset_list.block_analysis_df.to_excel(
writer, sheet_name="Block Analysis", index=False
)
# If we have outcomes, we add a tab with the outcomes
if not asset_list.outcomes_for_output.empty:
asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
asset_list.outcomes_for_output.to_excel(
writer, sheet_name="Outcomes", index=False
)
if not asset_list.unmatched_submissions.empty:
asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
asset_list.unmatched_submissions.to_excel(
writer, sheet_name="Unmatched Submissions", index=False
)
if not asset_list.outcomes_no_match.empty:
asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False)
asset_list.outcomes_no_match.to_excel(
writer, sheet_name="Unmatched Outcomes", index=False
)
if not asset_list.ecosurv_no_match.empty:
asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
asset_list.ecosurv_no_match.to_excel(
writer, sheet_name="Unmatched Ecosurv", index=False
)
if not asset_list.geographical_areas.empty:
asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
asset_list.geographical_areas.to_excel(
writer, sheet_name="Geographical Areas", index=False
)
# Store dupes
if asset_list.duplicated_addresses is not None:
if not asset_list.duplicated_addresses.empty:
asset_list.duplicated_addresses.to_excel(writer, sheet_name="Duplicate Properties", index=False)
asset_list.duplicated_addresses.to_excel(
writer, sheet_name="Duplicate Properties", index=False
)

View file

@ -520,4 +520,14 @@ BUILT_FORM_MAPPINGS = {
'2.EXT.WALL FLAT': 'mid-terrace',
'2 EXT. WALL FLAT': 'mid-terrace',
'Maisonette: Detached: Ground Floor': 'detached',
'Maisonette: Enclosed End Terrace: Top Floor': 'enclosed end-terrace',
'Flat: End Terrace: Basement': 'end-terrace',
'Flat: Mid Terrace: Basement': 'mid-terrace',
'Flat: Enclosed Mid Terrace: Basement': 'enclosed mid-terrace',
'House: Semi Detached: Top Floor': 'semi-detached',
'House: End Terrace: Ground Floor': 'end-terrace',
'Maisonette: Enclosed End Terrace: Mid Floor': 'enclosed end-terrace',
'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace'
}

View file

@ -17,5 +17,10 @@ EXISTING_PV_MAPPINGS = {
'PV: 10% roof area, PV: 2kWp array': 'already has PV',
'PV: 50% roof area': 'already has PV',
'Solar PV': 'already has PV',
'SOLAR PV': 'already has PV'
'SOLAR PV': 'already has PV',
'PV: 40% roof area, PV: 2kWp array': 'already has PV',
'PV: 33% roof area, PV: 2kWp array': 'already has PV',
'PV: 30% roof area': 'already has PV'
}

View file

@ -494,6 +494,10 @@ HEATING_MAPPINGS = {
'Gas (including LPG) room heaters: Gas fire, open flue, 1980 or later (open fronted), sitting proud of, '
'and sealed to, fireplace opening': 'room heaters',
'Boiler: A rated Regular Boiler, System 2: Boiler: C rated Regular Boiler': 'boiler - other fuel',
'Boiler: G rated Combi': 'gas condensing combi'
'Boiler: G rated Combi': 'gas condensing combi',
'Boiler: A rated Combi, System 2: Boiler: A rated Combi': 'gas combi boiler',
'System 2: Boiler: A rated Regular Boiler, Boiler: A rated Regular Boiler': 'gas boiler, radiators',
'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler'
}

View file

@ -427,6 +427,23 @@ PROPERTY_MAPPING = {
'End Terrace': 'unknown',
'Detached': 'unknown',
'Mid-terrace': 'unknown',
'MID - TERRACE': 'unknown'
'MID - TERRACE': 'unknown',
'COMOFF': 'unknown',
'LOTS': 'unknown',
'Maisonette: Detached: Ground Floor': 'maisonette',
'Maisonette: Enclosed End Terrace: Top Floor': 'maisonette',
'Flat: End Terrace: Basement': 'flat',
'Bungalow: EnclosedEndTerrace': 'bungalow',
'Flat: Mid Terrace: Basement': 'flat',
'House: Semi Detached: Top Floor': 'house',
'House: End Terrace: Ground Floor': 'house',
'Maisonette: Enclosed End Terrace: Mid Floor': 'maisonette',
'Flat: Enclosed Mid Terrace: Basement': 'flat',
'Warden Bungalow': 'bungalow',
'Warden Flat': 'flat',
'Upper Floor Flat': 'flat',
'Extracare Scheme': 'other'
}

View file

@ -301,4 +301,13 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'PitchedWithSlopingCeiling: As Built': 'pitched insulated',
'PitchedNormalLoftAccess: As Built': 'pitched unknown insulation',
'Flat: 150mm, Flat: Unknown': 'flat insulated',
'AnotherDwellingAbove: Unknown, Flat: Unknown': 'another dwelling above',
'AnotherDwellingAbove, AnotherDwellingAbove: Unknown': 'another dwelling above',
'PitchedNormalNoLoftAccess: Unknown, PitchedWithSlopingCeiling: As Built': 'pitched unknown access to loft',
'Flat: No Insulation': 'flat uninsulated',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 250mm': 'another dwelling above',
'PitchedNormalLoftAccess: 175mm': 'pitched insulated',
'AnotherDwellingAbove: 300mm': 'another dwelling above'
}

View file

@ -354,6 +354,15 @@ WALL_CONSTRUCTION_MAPPINGS = {
'System built Internal': 'insulated system built',
'Cavity: AsBuilt (1976-1982), TimberFrame: AsBuilt': 'cavity unknown insulation',
'Cavity: FilledCavityPlusExternal': 'filled cavity'
'Cavity: FilledCavityPlusExternal': 'filled cavity',
'Cavity, Filled Cavity': 'filled cavity',
'Solid Brick, As Built': 'solid brick unknown insulation',
'Cavity, As Built': 'cavity unknown insulation',
'Sandstone, As Built': 'sandstone or limestone unknown insulation',
'Timber Frame, As Built': 'timber frame unknown insulation',
'Solid Brick, Internal Insulation': 'insulated solid brick',
'Granite or Whinstone, As Built': 'granite or whinstone unknown insulation',
'Solid Brick, External': 'insulated solid brick'
}

View file

@ -1,7 +1,6 @@
postal
pandas
usaddress
pydantic-settings==2.6.0
epc-api-python==1.0.2
thefuzz
boto3
@ -10,6 +9,5 @@ openai>=1.3.5
tiktoken
msgpack
beautifulsoup4
pydantic>=1.10.7
typing-extensions>=4.5.0
requests>=2.28.2
requests>=2.28.2

22
backend/.env.test Normal file
View file

@ -0,0 +1,22 @@
DB_HOST=db
DB_PORT=5432
DB_NAME=tech_team_local_db
DB_USERNAME=postgres
DB_PASSWORD=makingwarmerhomes
#not used
GOOGLE_SOLAR_API_KEY=test
SAP_PREDICTIONS_BUCKET=test
CARBON_PREDICTIONS_BUCKET=test
HEAT_PREDICTIONS_BUCKET=test
HEATING_KWH_PREDICTIONS_BUCKET=test
HOTWATER_KWH_PREDICTIONS_BUCKET=test
API_KEY=test
ENVIRONMENT=test
SECRET_KEY=test
PLAN_TRIGGER_BUCKET=test
DATA_BUCKET=test
EPC_AUTH_TOKEN=test
ENGINE_SQS_URL=test
ENERGY_ASSESSMENTS_BUCKET=test

View file

@ -1256,7 +1256,8 @@ class Property:
"biodiesel": "Smokeless Fuel",
"b30d": "B30K Biofuel",
"coal": "Coal",
"oil": "Oil"
"oil": "Oil",
"unknown": None # Handle - anything post 2020 is electricity else gas
}
self.heating_energy_source = list({
@ -1326,7 +1327,16 @@ class Property:
if self.heating_energy_source == "Varied (Community Scheme)":
if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown
self.heating_energy_source = fuel_map[self.main_fuel["fuel_type"]]
mapped_to = fuel_map[self.main_fuel["fuel_type"]]
if mapped_to is None and self.main_fuel["fuel_type"] == "unknown":
# Handle logic based on age band
if self.year_built >= 2020:
self.heating_energy_source = "Electricity"
else:
self.heating_energy_source = "Natural Gas (Community Scheme)"
else:
self.heating_energy_source = mapped_to
else:
raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}")

View file

@ -0,0 +1,20 @@
We have list of address as input.
It'll come in batches of the same post code and from then we want to somehow convert that into UPRN
if this lambda/function can do that we'll be speeding ahead
Energy Performance Information: https://epc.opendatacommunities.org/
guidance page: https://epc.opendatacommunities.org/docs/guidance#field_domestic_LMK_KEY
Example of past khalims code that he wrote some tests for: https://github.com/Hestia-Homes/Model/blob/941be42b83a590e838fd3ee475bfd1ff31438789/backend/tests/test_search_epc.py#L11
Example of EPC search: https://github.com/Hestia-Homes/Model/blob/941be42b83a590e838fd3ee475bfd1ff31438789/backend/SearchEpc.py#L118
Khalim has made a python package to help scrape data: https://github.com/KhalimCK/epc-api-python

View file

View file

@ -0,0 +1,23 @@
FROM public.ecr.aws/lambda/python:3.10
# Set working directory (Lambda task root)
WORKDIR /var/task
# -----------------------------
# Copy requirements FIRST (for Docker layer caching)
# -----------------------------
COPY backend/address2UPRN/handler/requirements.txt .
# Install dependencies into Lambda runtime
RUN pip install --no-cache-dir -r requirements.txt
# -----------------------------
# Copy application code
# -----------------------------
COPY utils/ utils/
COPY backend/address2UPRN/main.py .
# -----------------------------
# Lambda handler
# -----------------------------
CMD ["main.handler"]

View file

@ -0,0 +1,3 @@
epc-api-python==1.0.2
tqdm
pandas

View file

@ -0,0 +1,571 @@
from epc_api.client import EpcClient
import os
from urllib.parse import urlencode
import pandas as pd
from difflib import SequenceMatcher
from tqdm import tqdm
from utils.logger import setup_logger
logger = setup_logger()
import re
EPC_AUTH_TOKEN = os.getenv(
"EPC_AUTH_TOKEN",
)
if EPC_AUTH_TOKEN is None:
raise RuntimeError("EPC_AUTH_TOKEN not defined in env")
import re
from difflib import SequenceMatcher
from typing import Set
def levenshtein(a: str, b: str) -> float:
"""
Address similarity score in [0, 1].
Strategy:
- Normalise
- Strongly penalise mismatched house/flat numbers
- Combine token overlap + character similarity
"""
def extract_number_sequence(s: str) -> list[str]:
return re.findall(r"\d+[a-z]?", s)
def extract_numbers(s: str) -> Set[str]:
return set(extract_number_sequence(s))
def tokenise(s: str) -> Set[str]:
return set(s.split())
def extract_building_number(s: str) -> str | None:
"""
Extract the main building number (NOT flat/unit).
Assumes formats like:
- '42 moreton road'
- 'flat 3 42 moreton road'
"""
tokens = s.split()
# remove flat/unit context
cleaned = []
skip_next = False
for t in tokens:
if t in ("flat", "apt", "apartment", "unit"):
skip_next = True
continue
if skip_next:
skip_next = False
continue
cleaned.append(t)
# first remaining number is building number
for t in cleaned:
if re.fullmatch(r"\d+[a-z]?", t):
return t
return None
a_norm = normalise_address(a)
b_norm = normalise_address(b)
# --- hard signal: numbers ---
nums_a = extract_numbers(a_norm)
nums_b = extract_numbers(b_norm)
if nums_a and not nums_b:
return 0.0
# No shared numbers at all → impossible match
if nums_a and nums_b and nums_a.isdisjoint(nums_b):
return 0.0
# 🔒 HARD GUARD: building number must match
bld_a = extract_building_number(a_norm)
bld_b = extract_building_number(b_norm)
if bld_a and bld_b and bld_a != bld_b:
return 0.0
# --- order-sensitive flat/building guard ---
seq_a = extract_number_sequence(a_norm)
seq_b = extract_number_sequence(b_norm)
has_flat_token_user = any(
tok in a_norm for tok in ("flat", "apt", "apartment", "unit")
)
has_flat_token_epc = "flat" in b_norm
if (
len(seq_a) == 2
and len(seq_b) >= 2
and has_flat_token_epc
and not has_flat_token_user
and seq_a != seq_b[:2]
):
return 0.0
# --- token similarity (order-independent) ---
toks_a = tokenise(a_norm)
toks_b = tokenise(b_norm)
if not toks_a or not toks_b:
token_score = 0.0
else:
token_score = len(toks_a & toks_b) / len(toks_a | toks_b)
# --- character similarity (soft signal) ---
char_score = SequenceMatcher(None, a_norm, b_norm).ratio()
# --- weighted blend ---
return round(
0.65 * token_score + 0.35 * char_score,
4,
)
def normalise_address(s: str) -> str:
"""
Canonical UK-focused address normalisation.
- Lowercases
- Removes punctuation (keeps / for flats)
- Normalises whitespace
- Applies synonym compression at token level
"""
if not s:
return ""
ADDRESS_SYNONYMS = {
# street types
"rd": "road",
"rd.": "road",
"st": "street",
"st.": "street",
"ave": "avenue",
"ave.": "avenue",
"ln": "lane",
"ln.": "lane",
"cres": "crescent",
"ct": "court",
"dr": "drive",
# flats / units
"apt": "flat",
"apartment": "flat",
"unit": "flat",
"ste": "suite",
# numbering noise
"no": "",
"no.": "",
}
# 1. lowercase
s = s.lower()
# 1.5 split digit-letter suffixes
s = re.sub(r"(\d+)([a-z])\b", r"\1 \2", s)
# 2. remove punctuation except /
s = re.sub(r"[^\w\s/]", " ", s)
# 3. normalise whitespace
s = re.sub(r"\s+", " ", s).strip()
# 4. tokenise + synonym normalisation
tokens = []
for tok in s.split():
replacement = ADDRESS_SYNONYMS.get(tok, tok)
if replacement:
tokens.append(replacement)
return " ".join(tokens)
def score_addresses(
df: pd.DataFrame,
user_address: str,
column: str = "address",
) -> pd.Series:
if column not in df.columns:
raise ValueError(f"Missing column: {column}")
return df[column].apply(lambda x: levenshtein(user_address, x))
def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
"""
Recursively fetch EPC data by postcode.
If results hit the size limit, retry with double size up to max_attempts.
"""
client = EpcClient(auth_token=EPC_AUTH_TOKEN)
url = os.path.join(client.domestic.host, "search")
if size:
url += "?" + urlencode({"size": size})
search_resp = client.domestic.call(
url=url,
method="get",
params={"postcode": postcode},
)
if not search_resp or "rows" not in search_resp:
return pd.DataFrame()
results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"])
row_count = len(results_df)
# If we hit the size limit, there *may* be more results
if row_count == size:
print(
f"⚠️ Warning: hit size limit ({size}) for postcode '{postcode}'. "
f"Attempt {attempt}/{max_attempts}."
)
if attempt < max_attempts:
print(f"🔁 Retrying with size={size * 2}")
return get_epc_data_with_postcode(
postcode=postcode,
size=size * 2,
attempt=attempt + 1,
max_attempts=max_attempts,
)
else:
print(
"🚨 Max attempts reached. Results may be truncated. "
"(Please do a manual review by the tech team.)"
)
return results_df
def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
"""
Returns True if all non-null UPRNs in df match the given uprn.
Returns False otherwise.
"""
if column not in df.columns:
return False
# Drop nulls and normalise to string
uprns = df[column].dropna().astype(str).str.strip().unique()
# No valid UPRNs to compare
if len(uprns) == 0:
return False
# Exactly one unique UPRN and it matches
return len(uprns) == 1 and uprns[0] == str(uprn)
def get_uprn_candidates(
df: pd.DataFrame,
user_address: str,
address_column: str = "address",
uprn_column: str = "uprn",
) -> pd.DataFrame:
"""
Annotate EPC results with lexicographical similarity scores and ranks.
Returns a DataFrame sorted by descending lexiscore.
DOES NOT choose or return a UPRN.
"""
if address_column not in df.columns:
raise ValueError(f"Missing column: {address_column}")
if uprn_column not in df.columns:
raise ValueError(f"Missing column: {uprn_column}")
out = df.copy()
user_norm = normalise_address(user_address)
out["lexiscore"] = out[address_column].apply(lambda x: levenshtein(user_norm, x))
# Normalise UPRN to string
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
# Rank: 1 = best match
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
return out.sort_values(
["lexirank", "lexiscore"],
ascending=[True, False],
)
def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
"""
Return uprn (str)
Return False if failed to find a sensible matching epc
Return Nons when epc found but no UPRN
"""
df = get_epc_data_with_postcode(postcode=postcode)
if df.empty:
return None
scored_df = get_uprn_candidates(
df,
user_address=user_inputed_address,
)
# Best score
best_score = scored_df.iloc[0]["lexiscore"]
if best_score <= 0:
return None
# All rank-1 rows (possible draw)
top_rank_df = scored_df[scored_df["lexirank"] == 1]
# If rank-1 rows do not agree on a single UPRN → ambiguous
if not df_has_single_uprn(top_rank_df, uprn=top_rank_df.iloc[0]["uprn"]):
return None
address = top_rank_df["address"].values[0]
lexiscore = float(top_rank_df["lexiscore"].values[0])
logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
# Safe to return the agreed UPRN
found_uprn = top_rank_df.iloc[0]["uprn"]
if found_uprn == "":
return None
if return_address:
return found_uprn, address
return found_uprn
def resolve_uprns_for_postcode_group(
group_df: pd.DataFrame,
epc_df: pd.DataFrame,
address_col: str = "Address 1",
) -> pd.DataFrame:
"""
Given:
- group_df: rows sharing the same postcode
- epc_df: EPC search results for that postcode
Returns:
group_df + found_uprn + diagnostics
"""
results = []
for _, row in group_df.iterrows():
user_address = str(row[address_col]).strip()
scored_df = get_uprn_candidates(
epc_df,
user_address=user_address,
)
if scored_df.empty:
results.append(
{
"found_uprn": None,
"best_match_uprn": None,
"best_match_address": None,
"best_match_lexiscore": None,
"status": "no_epc_candidates",
}
)
continue
best_score = scored_df.iloc[0]["lexiscore"]
if best_score <= 0:
results.append(
{
"found_uprn": None,
"best_match_uprn": None,
"best_match_address": None,
"best_match_lexiscore": best_score,
"status": "zero_score",
}
)
continue
top_rank_df = scored_df[scored_df["lexirank"] == 1]
if not df_has_single_uprn(top_rank_df, top_rank_df.iloc[0]["uprn"]):
results.append(
{
"found_uprn": None,
"best_match_uprn": top_rank_df.iloc[0]["uprn"],
"best_match_address": top_rank_df.iloc[0]["address"],
"best_match_lexiscore": best_score,
"status": "ambiguous",
}
)
continue
results.append(
{
"found_uprn": str(top_rank_df.iloc[0]["uprn"]),
"best_match_uprn": str(top_rank_df.iloc[0]["uprn"]),
"best_match_address": top_rank_df.iloc[0]["address"],
"best_match_lexiscore": best_score,
"status": "matched",
}
)
return pd.concat(
[group_df.reset_index(drop=True), pd.DataFrame(results)],
axis=1,
)
def test(a, b):
assert a == b, f"erorr: {a}{type(a)} != {b}: {type(b)}"
def run_all_test():
# Basic usage with different post codes styles
test(get_epc_data_with_postcode("b93 8sy").shape[0], 63)
test(get_epc_data_with_postcode("B938sy").shape[0], 63)
test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
test(get_uprn("68", "b93 8sy"), "100070989938")
test(get_uprn("68 Glendon Way", "b93 8sy"), "100070989938")
test(get_uprn("Flat A, 28, Nelgarde Road", "se6 4tf"), "100023278633")
test(get_uprn("28 A", "se6 4tf"), "100023278633")
test(get_uprn("28A", "se6 4tf"), "100023278633")
test(get_uprn("6 Aitken Close", "E8 4SQ"), False)
# unique case
test(get_uprn("Flat 5, 1, Semley Gate", "e9 5nh"), "10008238198")
test(get_uprn("5 , 1 Semley Gate", "e9 5nh"), "10008238198")
test(get_uprn("5 Semley Gate", "e9 5nh"), "10008238198")
test(get_uprn("1, 5 Semley Gate", "e9 5nh"), False)
test(
get_uprn("1 Semley Gate", "e9 5nh"), "10008238188"
) # this one return "flat 1, in 1 semley gate"
test(
get_uprn("48 Oswald Street", "E5 0BT"), False
) # this one return "flat 1, in 1 semley gate"
test(
get_uprn("42 Oswald Street", "E5 0BT"), False
) # this one return "flat 1, in 1 semley gate"
test(
get_uprn("46 Oswald Street", "E5 0BT"), False
) # this one return "flat 1, in 1 semley gate"
get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street")
get_uprn_candidates(
get_epc_data_with_postcode("Cr2 7dl"),
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",
)
if __name__ == "__main__":
INPUT_FILE = "hackney.xlsx"
ADDRESS_COL = "Address 1"
POSTCODE_COL = "Postcode"
UPRN_COL = "UPRN"
df = pd.read_excel(INPUT_FILE)
failures = []
for _, row in tqdm(
df.iterrows(),
total=len(df),
desc="Auditing UPRNs",
):
input_address = str(row[ADDRESS_COL]).strip()
postcode = str(row[POSTCODE_COL]).strip()
expected_uprn = None if pd.isna(row[UPRN_COL]) else str(int(row[UPRN_COL]))
try:
epc_df = get_epc_data_with_postcode(postcode)
if epc_df.empty:
failures.append(
{
**row.to_dict(),
"found_uprn": None,
"best_match_uprn": None,
"best_match_address": None,
"best_match_lexiscore": None,
"status": "no_epc_results",
}
)
continue
scored_df = get_uprn_candidates(
epc_df,
user_address=input_address,
)
best_row = scored_df.iloc[0]
best_match_uprn = str(best_row["uprn"])
best_match_address = best_row["address"]
best_match_lexiscore = round(float(best_row["lexiscore"]), 4)
found_uprn = get_uprn(input_address, postcode)
except Exception as e:
failures.append(
{
**row.to_dict(),
"found_uprn": None,
"best_match_uprn": None,
"best_match_address": None,
"best_match_lexiscore": None,
"status": "exception",
"error": str(e),
}
)
continue
found_uprn_norm = None if not found_uprn else str(found_uprn)
if found_uprn_norm != expected_uprn:
failures.append(
{
**row.to_dict(),
"found_uprn": found_uprn_norm,
"best_match_uprn": best_match_uprn,
"best_match_address": best_match_address,
"best_match_lexiscore": best_match_lexiscore,
"status": ("no_match" if found_uprn_norm is None else "mismatch"),
}
)
failures_df = pd.DataFrame(failures)
print("===================================")
print(f"Total rows : {len(df)}")
print(f"Failures : {len(failures_df)}")
print("===================================")
failures_df.to_excel(
"hackney_uprn_failures.xlsx",
index=False,
)
def handler(event, context):
print("hello world")
return {"statusCode": 200, "body": "hello world"}
# TO do function dispatcher,
# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate)
# fix that
# Look again at flat 1
# pandas reader the seperate postcode_splitter
# dump into s3

View file

@ -0,0 +1,24 @@
import pandas as pd
from tqdm import tqdm
from backend.address2UPRN.main import get_uprn
# Enable tqdm for pandas
tqdm.pandas()
df = pd.read_excel("address2.xlsx")
def extract_uprn(row):
print(row["User Input"], row["Postcode"])
result = get_uprn(row["User Input"], row["Postcode"], return_address=True)
if result is None:
return pd.Series([None, None])
uprn, found_address = result
return pd.Series([uprn, found_address])
df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1)
df.to_excel("outputs2.xlsx", index=False)

View file

@ -0,0 +1,40 @@
# tests/test_address_to_uprn_csv.py
import csv
import pytest
from pathlib import Path
from backend.address2UPRN.main import get_uprn
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
def load_test_cases():
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
return [
pytest.param(
row["User Input"],
row["Postcode"],
row["Manual UPRN Code"],
id=f'{row["User Input"]} [{row["Postcode"]}]',
)
for row in reader
]
@pytest.mark.parametrize(
"user_input,postcode,expected_uprn",
load_test_cases(),
)
def test_uprn_resolution_matches_manual(
user_input: str,
postcode: str,
expected_uprn: str,
):
from utils.logger import setup_logger
uprn = get_uprn(user_input, postcode)
if uprn:
assert uprn == expected_uprn
else:
assert str(uprn) == expected_uprn

View file

@ -0,0 +1,366 @@
User Input,Postcode,Manual UPRN Code
47 The Fairway,OX16 0RR,100120771697
11 REGENT COURT,SL1 3LG,100081041562
3/137a Windmill Road,TW8 9NH,100021516998
Flat 33,SW18 4BE,100023328943
FLAT 1 Brendon Grove,N2 8JE,200013412
Flat 15,KT8 2NE,100062123759
FLAT 5 Stonehill Road,W4 3AH,100021589829
10 Douglas Court,SL7 1UQ,100081278099
1 Windmill Road,HP17 8JA,766034606
31 Denewood,HP13 7LH,100081095964
"10, Greenways Drive",TW4 5DD,10091597009
Flat 10,W4 3AH,"100021589834"
Flat 11,TW4 5DD,10091597010
Flat 11,W4 3AH,100021589835
"12, Greenways Drive",TW4 5DD,10091597011
"Flat 12, Forbes House",W4 3AH,100021589836
FLAT 1 Goodstone Court,HA1 4FL,10070269053
Flat 13,TW4 5DD,10091597012
Flat 13,W4 3AH,100021589837
Flat 14,TW4 5DD,10091597013
Flat 14,W4 3AH,100021589838
Flat 15,TW4 5DD,10091597014
Flat 15,W4 3AH,100021589839
Flat 16,TW4 5DD,"10091597015"
Flat 16,W4 3AH,100021589840
Flat 17,TW4 5DD,10091597016
Flat 17,W4 3AH,100021589841
Flat 18,TW4 5DD,10091597017
Flat 19,W4 3AH,100021589843
Flat 20,W4 3AH,100021589844
Flat 21,W4 3AH,100021589845
Flat 22,W4 3AH,100021589846
FLAT 2 Goodstone Court,HA1 4FL,10070269054
Flat 23,W4 3AH,100021589847
Flat 24,W4 3AH,100021589848
"30c, Bosanquet Close",UB8 3PE,100021475316
"30e, Bosanquet Close",UB8 3PE,100021475318
FLAT 3 Goodstone Court,HA1 4FL,10070269055
FLAT 4 Goodstone Court,HA1 4FL,10070269056
FLAT 5 Goodstone Court,HA1 4FL,10070269057
FLAT 6 Goodstone Court,HA1 4FL,10070269058
FLAT 7 Goodstone Court,HA1 4FL,10070269059
FLAT 8 Goodstone Court,HA1 4FL,10070269060
FLAT 9 Goodstone Court,HA1 4FL,10070269061
FLAT 10 Goodstone Court,HA1 4FL,10070269062
FLAT 11 Goodstone Court,HA1 4FL,10070269063
FLAT 12 Goodstone Court,HA1 4FL,10070269064
FLAT 13 Goodstone Court,HA1 4FL,10070269065
FLAT 14 Goodstone Court,HA1 4FL,10070269066
FLAT 15 Goodstone Court,HA1 4FL,10070269067
FLAT 16 Goodstone Court,HA1 4FL,10070269068
FLAT 17 Goodstone Court,HA1 4FL,10070269069
FLAT 18 Goodstone Court,HA1 4FL,10070269070
FLAT 19 Goodstone Court,HA1 4FL,10070269071
FLAT 20 Goodstone Court,HA1 4FL,10070269072
FLAT 21 Goodstone Court,HA1 4FL,10070269073
FLAT 22 Goodstone Court,HA1 4FL,10070269074
FLAT 23 Goodstone Court,HA1 4FL,10070269075
FLAT 24 Goodstone Court,HA1 4FL,10070269076
FLAT 25 Goodstone Court,HA1 4FL,10070269077
FLAT 26 Goodstone Court,HA1 4FL,10070269078
FLAT 27 Goodstone Court,HA1 4FL,10070269079
FLAT 28 Goodstone Court,HA1 4FL,10070269080
FLAT 29 Goodstone Court,HA1 4FL,10070269081
FLAT 30 Goodstone Court,HA1 4FL,10070269082
FLAT 31 Goodstone Court,HA1 4FL,10070269083
FLAT 32 Goodstone Court,HA1 4FL,10070269084
FLAT 33 Goodstone Court,HA1 4FL,10070269085
FLAT 34 Goodstone Court,HA1 4FL,10070269086
FLAT 35 Goodstone Court,HA1 4FL,10070269087
FLAT 36 Goodstone Court,HA1 4FL,10070269088
FLAT 37 Goodstone Court,HA1 4FL,10070269089
FLAT 38 Goodstone Court,HA1 4FL,10070269090
FLAT 39 Goodstone Court,HA1 4FL,10070269091
FLAT 40 Goodstone Court,HA1 4FL,10070269092
FLAT 41 Goodstone Court,HA1 4FL,10070269093
FLAT 42 Goodstone Court,HA1 4FL,10070269094
FLAT 43 Goodstone Court,HA1 4FL,10070269095
"13 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778260
"14 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778259
"15 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778258
"16 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778263
"17 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778262
"18 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778261
"19 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778266
"20 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778265
"21 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778264
90a Murray Road,W5 4DA,12135293
"Flat 1, 6 Wolverton Gardens",W5 3LJ,"12119972"
"1, Monsted House",UB1 1FG,12189944
"10, Monsted House",UB1 1FG,12189953
"20, Monsted House",UB1 1FG,12189963
"2, Monsted House",UB1 1FG,12189945
"3, Monsted House",UB1 1FG,12189946
"4, Monsted House",UB1 1FG,12189947
"5, Monsted House",UB1 1FG,12189948
"6, Monsted House",UB1 1FG,12189949
"7, Monsted House",UB1 1FG,12189950
"8, Monsted House",UB1 1FG,12189951
"9, Monsted House",UB1 1FG,12189952
"1 Cullis House, 1, Accolade Avenue",UB1 1FH,12189904
"2 Cullis House, 1, Accolade Avenue",UB1 1FH,12189905
"3 Cullis House, 1, Accolade Avenue",UB1 1FH,12189906
"4 Cullis House, 1, Accolade Avenue",UB1 1FH,12189907
"5 Cullis House, 1, Accolade Avenue",UB1 1FH,12189908
"6 Cullis House, 1, Accolade Avenue",UB1 1FH,12189909
1 Genteel House Samara Drive,UB1 1FJ,12189835
2 Genteel House Samara Drive,UB1 1FJ,12189836
3 Genteel House Samara Drive,UB1 1FJ,12189837
4 Genteel House Samara Drive,UB1 1FJ,12189838
5 Genteel House Samara Drive,UB1 1FJ,12189839
6 Genteel House Samara Drive,UB1 1FJ,12189840
7 Genteel House Samara Drive,UB1 1FJ,12189841
8 Genteel House Samara Drive,UB1 1FJ,12189842
9 Genteel House Samara Drive,UB1 1FJ,12189843
10 Genteel House Samara Drive,UB1 1FJ,12189844
1 ASH TREE HOUSE,SE5 0TE,None
"Flat 1 Ash Tree House, 2, Thompson Avenue",SE5 0TE,10009803979
3 ASH TREE HOUSE,SE5 0TE,None
Flat 3 ASH TREE HOUSE,SE5 0TE,10009803981
5 ASH TREE HOUSE,SE5 0TE,None
Flat 5 ASH TREE HOUSE,SE5 0TE,10009803983
Flat 8 ASH TREE HOUSE,SE5 0TE,10009803986
8 ASH TREE HOUSE,SE5 0TE,None
Flat 12 ASH TREE HOUSE,SE5 0TE,10009803990
12 ASH TREE HOUSE,SE5 0TE,None
FLAT 1 599 HARROW ROAD,W10 4RA,217113930
FLAT 2 599 HARROW ROAD,W10 4RA,217113931
FLAT 3 599 HARROW ROAD,W10 4RA,None
FLAT 4 599 HARROW ROAD,W10 4RA,None
FLAT 5 599 HARROW ROAD,W10 4RA,217113934
FLAT 6 599 HARROW ROAD,W10 4RA,None
FLAT 7 599 HARROW ROAD,W10 4RA,None
FLAT 8 599 HARROW ROAD,W10 4RA,None
"Flat 1, Ohio Building",SE13 7RX,10023226256
"Flat 2, Ohio Building",SE13 7RX,10023226257
"Apartment 1 Block B, 105, Benwell Road",N7 7BW,10012792307
"Apartment 2 Block B, 105, Benwell Road",N7 7BW,10012792308
"Apartment 3 Block B, 105, Benwell Road",N7 7BW,10012792309
"Apartment 4 Block B, 105, Benwell Road",N7 7BW,10012792310
"Apartment 5 Block B, 105, Benwell Road",N7 7BW,10012792311
"Apartment 6 Block B, 105, Benwell Road",N7 7BW,10012792312
"Apartment 7 Block B, 105, Benwell Road",N7 7BW,10012792313
"Apartment 8 Block B, 105, Benwell Road",N7 7BW,10012792314
"Apartment 9 Block B, 105, Benwell Road",N7 7BW,10012792315
"Apartment 10 Block B, 105, Benwell Road",N7 7BW,10012792316
"Apartment 11 Block B, 105, Benwell Road",N7 7BW,10012792317
"Apartment 12 Block B, 105, Benwell Road",N7 7BW,10012792318
"Apartment 13 Block B, 105, Benwell Road",N7 7BW,10012792319
"Apartment 1 Block D, 32, Hornsey Road",N7 7AT,10012792366
"Apartment 2 Block D, 32, Hornsey Road",N7 7AT,10012792367
"Apartment 3 Block D, 32, Hornsey Road",N7 7AT,10012792368
"Apartment 4 Block D, 32, Hornsey Road",N7 7AT,10012792369
"Apartment 5 Block D, 32, Hornsey Road",N7 7AT,10012792370
"Apartment 6 Block D, 32, Hornsey Road",N7 7AT,"10012792371"
"Apartment 7 Block D, 32, Hornsey Road",N7 7AT,10012792372
"Apartment 8 Block D, 32, Hornsey Road",N7 7AT,10012792373
"Apartment 9 Block D, 32, Hornsey Road",N7 7AT,10012792374
"Apartment 10 Block D, 32, Hornsey Road",N7 7AT,10012792375
"Apartment 11 Block D, 32, Hornsey Road",N7 7AT,10012792376
"Apartment 12 Block D, 32, Hornsey Road",N7 7AT,10012792377
"Apartment 13 Block D, 32, Hornsey Road",N7 7AT,10012792378
"Apartment 14 Block D, 32, Hornsey Road",N7 7AT,10012792379
"Apartment 15 Block D, 32, Hornsey Road",N7 7AT,10012792380
"Apartment 16 Block D, 32, Hornsey Road",N7 7AT,"10012792381"
"Apartment 17Block D, 32, Hornsey Road",N7 7AT,10012792382
"Apartment 18 Block D, 32, Hornsey Road",N7 7AT,10012792383
24b Honley Road,SE6 2HZ,None
FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
2 COLLEGE HOUSE,CM7 1JS,100091449870
3 COLLEGE HOUSE,CM7 1JS,100091449871
1 Anita Street,M4 5DU,None
2 Anita Street,M4 5DU,77123061
5 Anita Street,M4 5DU,77123081
6 Anita Street,M4 5DU,77123082
8 Anita Street,M4 5DU,None
9 Anita Street,M4 5DU,None
10 Anita Street,M4 5DU,77123051
12 Anita Street,M4 5DU,77123053
19 Anita Street,M4 5DU,None
22 Anita Street,M4 5DU,None
26 Anita Street,M4 5DU,77123068
28 Anita Street,M4 5DU,None
30 Anita Street,M4 5DU,None
32 Anita Street,M4 5DU,None
33 Anita Street,M4 5DU,77123076
34 Anita Street,M4 5DU,None
35 Anita Street,M4 5DU,77123078
36 Anita Street,M4 5DU,77123079
23 George Leigh Street,M4 5DR,77123171
25 George Leigh Street,M4 5DR,None
35 George Leigh Street,M4 5DR,77123177
39 George Leigh Street,M4 5DR,77123179
41 George Leigh Street,M4 5DR,None
43 George Leigh Street,M4 5DR,None
49 George Leigh Street,M4 5DR,None
51 George Leigh Street,M4 5DR,77123185
55 George Leigh Street,M4 5DR,None
57 George Leigh Street,M4 5DR,None
"1a, Victoria Square",M4 5DX,77211153
2a Victoria Square ,M4 5DX,None
"4a, Victoria Square",M4 5DX,77211155
5a Victoria Square,M4 5DX,77211156
6a Victoria Square,M4 5DX,77211157
7a Victoria Square,M4 5DX,77211158
8a Victoria Square,M4 5DX,77211159
9a Victoria Square,M4 5DX,77211160
10a Victoria Square,M4 5DX,77211161
11a Victoria Square,M4 5DX,77211162
12a Victoria Square,M4 5DX,77211163
13a Victoria Square,M4 5DX,77211164
14a Victoria Square,M4 5DX,77211165
15a Victoria Square,M4 5DX,77211166
16a Victoria Square,M4 5DX,77211167
17a Victoria Square,M4 5DX,77211168
18a Victoria Square,M4 5DX,77211169
19a Victoria Square,M4 5DX,77211170
20a Victoria Square,M4 5DX,77211171
21a Victoria Square,M4 5DY,77211172
22a Victoria Square,M4 5DY,None
23a Victoria Square,M4 5DY,77211174
24a Victoria Square,M4 5DY,77211175
25a Victoria Square,M4 5DY,77211176
26a Victoria Square,M4 5DY,77211177
27a Victoria Square,M4 5DY,77211178
28a Victoria Square,M4 5DY,None
29a Victoria Square,M4 5DY,77211180
30a Victoria Square,M4 5DY,77211181
31a Victoria Square,M4 5DY,77211182
32a Victoria Square,M4 5DY,77211183
33a Victoria Square,M4 5DY,77211184
34a Victoria Square,M4 5DY,77211185
35a Victoria Square,M4 5DY,None
36a Victoria Square,M4 5DY,77211187
37a Victoria Square,M4 5DY,77211188
38a Victoria Square,M4 5DY,77211189
39a Victoria Square,M4 5DY,77211190
40a Victoria Square,M4 5DY,None
41a Victoria Square,M4 5DY,77211192
42a Victoria Square,M4 5DY,77211193
43a Victoria Square,M4 5DY,77211194
44a Victoria Square,M4 5DY,77211195
45a Victoria Square,M4 5DY,77211196
46a Victoria Square,M4 5DY,77211197
47a Victoria Square,M4 5DY,77211198
48a Victoria Square,M4 5DY,77211199
49a Victoria Square,M4 5DY,77211200
50a Victoria Square,M4 5DY,77211201
51a Victoria Square,M4 5DY,77211202
52a Victoria Square,M4 5DY,77211203
53a Victoria Square,M4 5DY,77211204
54a Victoria Square,M4 5DY,77211205
55a Victoria Square,M4 5DY,77211206
56a Victoria Square,M4 5DZ,77211207
57a Victoria Square,M4 5DZ,None
58a Victoria Square,M4 5DZ,77211209
59a Victoria Square,M4 5DZ,77211210
60a Victoria Square,M4 5DZ,77211211
61a Victoria Square,M4 5DZ,77211212
62a Victoria Square,M4 5DZ,77211213
63a Victoria Square,M4 5DZ,None
64a Victoria Square,M4 5DZ,77211215
65a Victoria Square,M4 5DZ,77211216
66a Victoria Square,M4 5DZ,None
67a Victoria Square,M4 5DZ,None
68a Victoria Square,M4 5DZ,77211219
69a Victoria Square,M4 5DZ,77211220
70a Victoria Square,M4 5DZ,77211221
71a Victoria Square,M4 5DZ,77211222
72a Victoria Square,M4 5DZ,77211223
73a Victoria Square,M4 5DZ,77211224
74a Victoria Square,M4 5DZ,None
75a Victoria Square,M4 5DZ,77211226
76a Victoria Square,M4 5DZ,77211227
77a Victoria Square,M4 5DZ,None
78a Victoria Square,M4 5DZ,77211229
79a Victoria Square,M4 5DZ,77211230
80a Victoria Square,M4 5DZ,77211231
81a Victoria Square,M4 5DZ,77211232
82 Victoria Square,M4 5DZ,None
83a Victoria Square,M4 5DZ,77211234
84a Victoria Square,M4 5DZ,None
85a Victoria Square,M4 5DZ,77211236
86a Victoria Square,M4 5DZ,77211237
87a Victoria Square,M4 5DZ,77211238
88a Victoria Square,M4 5DZ,None
89a Victoria Square,M4 5DZ,77211240
90a Victoria Square,M4 5DZ,77211241
91a Victoria Square,M4 5DZ,77211242
92a Victoria Square,M4 5DZ,77211243
93a Victoria Square,M4 5EA,77211244
94a Victoria Square,M4 5EA,None
95a Victoria Square,M4 5EA,77211246
96a Victoria Square,M4 5EA,77211247
97a Victoria Square,M4 5EA,77211248
98a Victoria Square,M4 5EA,77211249
99a Victoria Square,M4 5EA,77211250
100a Victoria Square,M4 5EA,77211251
101a Victoria Square,M4 5EA,None
102a Victoria Square,M4 5EA,None
103a Victoria Square,M4 5EA,77211254
104a Victoria Square,M4 5EA,77211255
105a Victoria Square,M4 5EA,None
106a Victoria Square,M4 5EA,77211257
107a Victoria Square,M4 5EA,77211258
108a Victoria Square,M4 5EA,77211259
109a Victoria Square,M4 5EA,77211260
110a Victoria Square,M4 5EA,77211261
111a Victoria Square,M4 5EA,77211262
112a Victoria Square,M4 5EA,None
113a Victoria Square,M4 5EA,77211264
114a Victoria Square,M4 5EA,77211265
115a Victoria Square,M4 5EA,77211266
116a Victoria Square,M4 5EA,77211267
117a Victoria Square,M4 5EA,None
118a Victoria Square,M4 5EA,None
119a Victoria Square,M4 5EA,77211270
120a Victoria Square,M4 5EA,77211271
121a Victoria Square,M4 5EA,77211272
122a Victoria Square,M4 5EA,77211273
123a Victoria Square,M4 5EA,77211274
124a Victoria Square,M4 5EA,None
125a Victoria Square,M4 5EA,77211276
126a Victoria Square,M4 5EA,77211277
127a Victoria Square,M4 5EA,77211278
128a Victoria Square,M4 5EA,77211279
129a Victoria Square,M4 5EA,77211280
130a Victoria Square,M4 5FA,77211281
131a Victoria Square,M4 5FA,77211282
132a Victoria Square,M4 5FA,77211283
133a Victoria Square,M4 5FA,None
134a Victoria Square,M4 5FA,77211285
135a Victoria Square,M4 5FA,77211286
136a Victoria Square,M4 5FA,77211287
137a Victoria Square,M4 5FA,77211288
138a Victoria Square,M4 5FA,77211289
139a Victoria Square,M4 5FA,77211290
140a Victoria Square,M4 5FA,77211291
141a Victoria Square,M4 5FA,77211292
142a Victoria Square,M4 5FA,77211293
143a Victoria Square,M4 5FA,77211294
144a Victoria Square,M4 5FA,77211295
145a Victoria Square,M4 5FA,None
146a Victoria Square,M4 5FA,77211297
147a Victoria Square,M4 5FA,77211298
148a Victoria Square,M4 5FA,77211299
149a Victoria Square,M4 5FA,77211300
150a Victoria Square,M4 5FA,77211301
151a Victoria Square,M4 5FA,None
152a Victoria Square,M4 5FA,77211303
153a Victoria Square,M4 5FA,None
154a Victoria Square,M4 5FA,77211305
155a Victoria Square,M4 5FA,None
156a Victoria Square,M4 5FA,77211307
157a Victoria Square,M4 5FA,77211308
158a Victoria Square,M4 5FA,77211309
159a Victoria Square,M4 5FA,None
160a Victoria Square,M4 5FA,77211311
161a Victoria Square,M4 5FA,None
162a Victoria Square,M4 5FA,None
163a Victoria Square,M4 5FA,77211314
164a Victoria Square,M4 5FA,77211315
165a Victoria Square,M4 5FA,77211316
166a Victoria Square,M4 5FA,None
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None
1 User Input Postcode Manual UPRN Code
2 47 The Fairway OX16 0RR 100120771697
3 11 REGENT COURT SL1 3LG 100081041562
4 3/137a Windmill Road TW8 9NH 100021516998
5 Flat 33 SW18 4BE 100023328943
6 FLAT 1 Brendon Grove N2 8JE 200013412
7 Flat 15 KT8 2NE 100062123759
8 FLAT 5 Stonehill Road W4 3AH 100021589829
9 10 Douglas Court SL7 1UQ 100081278099
10 1 Windmill Road HP17 8JA 766034606
11 31 Denewood HP13 7LH 100081095964
12 10, Greenways Drive TW4 5DD 10091597009
13 Flat 10 W4 3AH 100021589834
14 Flat 11 TW4 5DD 10091597010
15 Flat 11 W4 3AH 100021589835
16 12, Greenways Drive TW4 5DD 10091597011
17 Flat 12, Forbes House W4 3AH 100021589836
18 FLAT 1 Goodstone Court HA1 4FL 10070269053
19 Flat 13 TW4 5DD 10091597012
20 Flat 13 W4 3AH 100021589837
21 Flat 14 TW4 5DD 10091597013
22 Flat 14 W4 3AH 100021589838
23 Flat 15 TW4 5DD 10091597014
24 Flat 15 W4 3AH 100021589839
25 Flat 16 TW4 5DD 10091597015
26 Flat 16 W4 3AH 100021589840
27 Flat 17 TW4 5DD 10091597016
28 Flat 17 W4 3AH 100021589841
29 Flat 18 TW4 5DD 10091597017
30 Flat 19 W4 3AH 100021589843
31 Flat 20 W4 3AH 100021589844
32 Flat 21 W4 3AH 100021589845
33 Flat 22 W4 3AH 100021589846
34 FLAT 2 Goodstone Court HA1 4FL 10070269054
35 Flat 23 W4 3AH 100021589847
36 Flat 24 W4 3AH 100021589848
37 30c, Bosanquet Close UB8 3PE 100021475316
38 30e, Bosanquet Close UB8 3PE 100021475318
39 FLAT 3 Goodstone Court HA1 4FL 10070269055
40 FLAT 4 Goodstone Court HA1 4FL 10070269056
41 FLAT 5 Goodstone Court HA1 4FL 10070269057
42 FLAT 6 Goodstone Court HA1 4FL 10070269058
43 FLAT 7 Goodstone Court HA1 4FL 10070269059
44 FLAT 8 Goodstone Court HA1 4FL 10070269060
45 FLAT 9 Goodstone Court HA1 4FL 10070269061
46 FLAT 10 Goodstone Court HA1 4FL 10070269062
47 FLAT 11 Goodstone Court HA1 4FL 10070269063
48 FLAT 12 Goodstone Court HA1 4FL 10070269064
49 FLAT 13 Goodstone Court HA1 4FL 10070269065
50 FLAT 14 Goodstone Court HA1 4FL 10070269066
51 FLAT 15 Goodstone Court HA1 4FL 10070269067
52 FLAT 16 Goodstone Court HA1 4FL 10070269068
53 FLAT 17 Goodstone Court HA1 4FL 10070269069
54 FLAT 18 Goodstone Court HA1 4FL 10070269070
55 FLAT 19 Goodstone Court HA1 4FL 10070269071
56 FLAT 20 Goodstone Court HA1 4FL 10070269072
57 FLAT 21 Goodstone Court HA1 4FL 10070269073
58 FLAT 22 Goodstone Court HA1 4FL 10070269074
59 FLAT 23 Goodstone Court HA1 4FL 10070269075
60 FLAT 24 Goodstone Court HA1 4FL 10070269076
61 FLAT 25 Goodstone Court HA1 4FL 10070269077
62 FLAT 26 Goodstone Court HA1 4FL 10070269078
63 FLAT 27 Goodstone Court HA1 4FL 10070269079
64 FLAT 28 Goodstone Court HA1 4FL 10070269080
65 FLAT 29 Goodstone Court HA1 4FL 10070269081
66 FLAT 30 Goodstone Court HA1 4FL 10070269082
67 FLAT 31 Goodstone Court HA1 4FL 10070269083
68 FLAT 32 Goodstone Court HA1 4FL 10070269084
69 FLAT 33 Goodstone Court HA1 4FL 10070269085
70 FLAT 34 Goodstone Court HA1 4FL 10070269086
71 FLAT 35 Goodstone Court HA1 4FL 10070269087
72 FLAT 36 Goodstone Court HA1 4FL 10070269088
73 FLAT 37 Goodstone Court HA1 4FL 10070269089
74 FLAT 38 Goodstone Court HA1 4FL 10070269090
75 FLAT 39 Goodstone Court HA1 4FL 10070269091
76 FLAT 40 Goodstone Court HA1 4FL 10070269092
77 FLAT 41 Goodstone Court HA1 4FL 10070269093
78 FLAT 42 Goodstone Court HA1 4FL 10070269094
79 FLAT 43 Goodstone Court HA1 4FL 10070269095
80 13 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778260
81 14 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778259
82 15 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778258
83 16 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778263
84 17 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778262
85 18 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778261
86 19 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778266
87 20 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778265
88 21 Stubwick Court, Old Saw Mill Place HP6 6FF 10013778264
89 90a Murray Road W5 4DA 12135293
90 Flat 1, 6 Wolverton Gardens W5 3LJ 12119972
91 1, Monsted House UB1 1FG 12189944
92 10, Monsted House UB1 1FG 12189953
93 20, Monsted House UB1 1FG 12189963
94 2, Monsted House UB1 1FG 12189945
95 3, Monsted House UB1 1FG 12189946
96 4, Monsted House UB1 1FG 12189947
97 5, Monsted House UB1 1FG 12189948
98 6, Monsted House UB1 1FG 12189949
99 7, Monsted House UB1 1FG 12189950
100 8, Monsted House UB1 1FG 12189951
101 9, Monsted House UB1 1FG 12189952
102 1 Cullis House, 1, Accolade Avenue UB1 1FH 12189904
103 2 Cullis House, 1, Accolade Avenue UB1 1FH 12189905
104 3 Cullis House, 1, Accolade Avenue UB1 1FH 12189906
105 4 Cullis House, 1, Accolade Avenue UB1 1FH 12189907
106 5 Cullis House, 1, Accolade Avenue UB1 1FH 12189908
107 6 Cullis House, 1, Accolade Avenue UB1 1FH 12189909
108 1 Genteel House Samara Drive UB1 1FJ 12189835
109 2 Genteel House Samara Drive UB1 1FJ 12189836
110 3 Genteel House Samara Drive UB1 1FJ 12189837
111 4 Genteel House Samara Drive UB1 1FJ 12189838
112 5 Genteel House Samara Drive UB1 1FJ 12189839
113 6 Genteel House Samara Drive UB1 1FJ 12189840
114 7 Genteel House Samara Drive UB1 1FJ 12189841
115 8 Genteel House Samara Drive UB1 1FJ 12189842
116 9 Genteel House Samara Drive UB1 1FJ 12189843
117 10 Genteel House Samara Drive UB1 1FJ 12189844
118 1 ASH TREE HOUSE SE5 0TE None
119 Flat 1 Ash Tree House, 2, Thompson Avenue SE5 0TE 10009803979
120 3 ASH TREE HOUSE SE5 0TE None
121 Flat 3 ASH TREE HOUSE SE5 0TE 10009803981
122 5 ASH TREE HOUSE SE5 0TE None
123 Flat 5 ASH TREE HOUSE SE5 0TE 10009803983
124 Flat 8 ASH TREE HOUSE SE5 0TE 10009803986
125 8 ASH TREE HOUSE SE5 0TE None
126 Flat 12 ASH TREE HOUSE SE5 0TE 10009803990
127 12 ASH TREE HOUSE SE5 0TE None
128 FLAT 1 599 HARROW ROAD W10 4RA 217113930
129 FLAT 2 599 HARROW ROAD W10 4RA 217113931
130 FLAT 3 599 HARROW ROAD W10 4RA None
131 FLAT 4 599 HARROW ROAD W10 4RA None
132 FLAT 5 599 HARROW ROAD W10 4RA 217113934
133 FLAT 6 599 HARROW ROAD W10 4RA None
134 FLAT 7 599 HARROW ROAD W10 4RA None
135 FLAT 8 599 HARROW ROAD W10 4RA None
136 Flat 1, Ohio Building SE13 7RX 10023226256
137 Flat 2, Ohio Building SE13 7RX 10023226257
138 Apartment 1 Block B, 105, Benwell Road N7 7BW 10012792307
139 Apartment 2 Block B, 105, Benwell Road N7 7BW 10012792308
140 Apartment 3 Block B, 105, Benwell Road N7 7BW 10012792309
141 Apartment 4 Block B, 105, Benwell Road N7 7BW 10012792310
142 Apartment 5 Block B, 105, Benwell Road N7 7BW 10012792311
143 Apartment 6 Block B, 105, Benwell Road N7 7BW 10012792312
144 Apartment 7 Block B, 105, Benwell Road N7 7BW 10012792313
145 Apartment 8 Block B, 105, Benwell Road N7 7BW 10012792314
146 Apartment 9 Block B, 105, Benwell Road N7 7BW 10012792315
147 Apartment 10 Block B, 105, Benwell Road N7 7BW 10012792316
148 Apartment 11 Block B, 105, Benwell Road N7 7BW 10012792317
149 Apartment 12 Block B, 105, Benwell Road N7 7BW 10012792318
150 Apartment 13 Block B, 105, Benwell Road N7 7BW 10012792319
151 Apartment 1 Block D, 32, Hornsey Road N7 7AT 10012792366
152 Apartment 2 Block D, 32, Hornsey Road N7 7AT 10012792367
153 Apartment 3 Block D, 32, Hornsey Road N7 7AT 10012792368
154 Apartment 4 Block D, 32, Hornsey Road N7 7AT 10012792369
155 Apartment 5 Block D, 32, Hornsey Road N7 7AT 10012792370
156 Apartment 6 Block D, 32, Hornsey Road N7 7AT 10012792371
157 Apartment 7 Block D, 32, Hornsey Road N7 7AT 10012792372
158 Apartment 8 Block D, 32, Hornsey Road N7 7AT 10012792373
159 Apartment 9 Block D, 32, Hornsey Road N7 7AT 10012792374
160 Apartment 10 Block D, 32, Hornsey Road N7 7AT 10012792375
161 Apartment 11 Block D, 32, Hornsey Road N7 7AT 10012792376
162 Apartment 12 Block D, 32, Hornsey Road N7 7AT 10012792377
163 Apartment 13 Block D, 32, Hornsey Road N7 7AT 10012792378
164 Apartment 14 Block D, 32, Hornsey Road N7 7AT 10012792379
165 Apartment 15 Block D, 32, Hornsey Road N7 7AT 10012792380
166 Apartment 16 Block D, 32, Hornsey Road N7 7AT 10012792381
167 Apartment 17Block D, 32, Hornsey Road N7 7AT 10012792382
168 Apartment 18 Block D, 32, Hornsey Road N7 7AT 10012792383
169 24b Honley Road SE6 2HZ None
170 FLAT B 158 LEAHURST ROAD SE13 5NL 100021976974
171 2 COLLEGE HOUSE CM7 1JS 100091449870
172 3 COLLEGE HOUSE CM7 1JS 100091449871
173 1 Anita Street M4 5DU None
174 2 Anita Street M4 5DU 77123061
175 5 Anita Street M4 5DU 77123081
176 6 Anita Street M4 5DU 77123082
177 8 Anita Street M4 5DU None
178 9 Anita Street M4 5DU None
179 10 Anita Street M4 5DU 77123051
180 12 Anita Street M4 5DU 77123053
181 19 Anita Street M4 5DU None
182 22 Anita Street M4 5DU None
183 26 Anita Street M4 5DU 77123068
184 28 Anita Street M4 5DU None
185 30 Anita Street M4 5DU None
186 32 Anita Street M4 5DU None
187 33 Anita Street M4 5DU 77123076
188 34 Anita Street M4 5DU None
189 35 Anita Street M4 5DU 77123078
190 36 Anita Street M4 5DU 77123079
191 23 George Leigh Street M4 5DR 77123171
192 25 George Leigh Street M4 5DR None
193 35 George Leigh Street M4 5DR 77123177
194 39 George Leigh Street M4 5DR 77123179
195 41 George Leigh Street M4 5DR None
196 43 George Leigh Street M4 5DR None
197 49 George Leigh Street M4 5DR None
198 51 George Leigh Street M4 5DR 77123185
199 55 George Leigh Street M4 5DR None
200 57 George Leigh Street M4 5DR None
201 1a, Victoria Square M4 5DX 77211153
202 2a Victoria Square M4 5DX None
203 4a, Victoria Square M4 5DX 77211155
204 5a Victoria Square M4 5DX 77211156
205 6a Victoria Square M4 5DX 77211157
206 7a Victoria Square M4 5DX 77211158
207 8a Victoria Square M4 5DX 77211159
208 9a Victoria Square M4 5DX 77211160
209 10a Victoria Square M4 5DX 77211161
210 11a Victoria Square M4 5DX 77211162
211 12a Victoria Square M4 5DX 77211163
212 13a Victoria Square M4 5DX 77211164
213 14a Victoria Square M4 5DX 77211165
214 15a Victoria Square M4 5DX 77211166
215 16a Victoria Square M4 5DX 77211167
216 17a Victoria Square M4 5DX 77211168
217 18a Victoria Square M4 5DX 77211169
218 19a Victoria Square M4 5DX 77211170
219 20a Victoria Square M4 5DX 77211171
220 21a Victoria Square M4 5DY 77211172
221 22a Victoria Square M4 5DY None
222 23a Victoria Square M4 5DY 77211174
223 24a Victoria Square M4 5DY 77211175
224 25a Victoria Square M4 5DY 77211176
225 26a Victoria Square M4 5DY 77211177
226 27a Victoria Square M4 5DY 77211178
227 28a Victoria Square M4 5DY None
228 29a Victoria Square M4 5DY 77211180
229 30a Victoria Square M4 5DY 77211181
230 31a Victoria Square M4 5DY 77211182
231 32a Victoria Square M4 5DY 77211183
232 33a Victoria Square M4 5DY 77211184
233 34a Victoria Square M4 5DY 77211185
234 35a Victoria Square M4 5DY None
235 36a Victoria Square M4 5DY 77211187
236 37a Victoria Square M4 5DY 77211188
237 38a Victoria Square M4 5DY 77211189
238 39a Victoria Square M4 5DY 77211190
239 40a Victoria Square M4 5DY None
240 41a Victoria Square M4 5DY 77211192
241 42a Victoria Square M4 5DY 77211193
242 43a Victoria Square M4 5DY 77211194
243 44a Victoria Square M4 5DY 77211195
244 45a Victoria Square M4 5DY 77211196
245 46a Victoria Square M4 5DY 77211197
246 47a Victoria Square M4 5DY 77211198
247 48a Victoria Square M4 5DY 77211199
248 49a Victoria Square M4 5DY 77211200
249 50a Victoria Square M4 5DY 77211201
250 51a Victoria Square M4 5DY 77211202
251 52a Victoria Square M4 5DY 77211203
252 53a Victoria Square M4 5DY 77211204
253 54a Victoria Square M4 5DY 77211205
254 55a Victoria Square M4 5DY 77211206
255 56a Victoria Square M4 5DZ 77211207
256 57a Victoria Square M4 5DZ None
257 58a Victoria Square M4 5DZ 77211209
258 59a Victoria Square M4 5DZ 77211210
259 60a Victoria Square M4 5DZ 77211211
260 61a Victoria Square M4 5DZ 77211212
261 62a Victoria Square M4 5DZ 77211213
262 63a Victoria Square M4 5DZ None
263 64a Victoria Square M4 5DZ 77211215
264 65a Victoria Square M4 5DZ 77211216
265 66a Victoria Square M4 5DZ None
266 67a Victoria Square M4 5DZ None
267 68a Victoria Square M4 5DZ 77211219
268 69a Victoria Square M4 5DZ 77211220
269 70a Victoria Square M4 5DZ 77211221
270 71a Victoria Square M4 5DZ 77211222
271 72a Victoria Square M4 5DZ 77211223
272 73a Victoria Square M4 5DZ 77211224
273 74a Victoria Square M4 5DZ None
274 75a Victoria Square M4 5DZ 77211226
275 76a Victoria Square M4 5DZ 77211227
276 77a Victoria Square M4 5DZ None
277 78a Victoria Square M4 5DZ 77211229
278 79a Victoria Square M4 5DZ 77211230
279 80a Victoria Square M4 5DZ 77211231
280 81a Victoria Square M4 5DZ 77211232
281 82 Victoria Square M4 5DZ None
282 83a Victoria Square M4 5DZ 77211234
283 84a Victoria Square M4 5DZ None
284 85a Victoria Square M4 5DZ 77211236
285 86a Victoria Square M4 5DZ 77211237
286 87a Victoria Square M4 5DZ 77211238
287 88a Victoria Square M4 5DZ None
288 89a Victoria Square M4 5DZ 77211240
289 90a Victoria Square M4 5DZ 77211241
290 91a Victoria Square M4 5DZ 77211242
291 92a Victoria Square M4 5DZ 77211243
292 93a Victoria Square M4 5EA 77211244
293 94a Victoria Square M4 5EA None
294 95a Victoria Square M4 5EA 77211246
295 96a Victoria Square M4 5EA 77211247
296 97a Victoria Square M4 5EA 77211248
297 98a Victoria Square M4 5EA 77211249
298 99a Victoria Square M4 5EA 77211250
299 100a Victoria Square M4 5EA 77211251
300 101a Victoria Square M4 5EA None
301 102a Victoria Square M4 5EA None
302 103a Victoria Square M4 5EA 77211254
303 104a Victoria Square M4 5EA 77211255
304 105a Victoria Square M4 5EA None
305 106a Victoria Square M4 5EA 77211257
306 107a Victoria Square M4 5EA 77211258
307 108a Victoria Square M4 5EA 77211259
308 109a Victoria Square M4 5EA 77211260
309 110a Victoria Square M4 5EA 77211261
310 111a Victoria Square M4 5EA 77211262
311 112a Victoria Square M4 5EA None
312 113a Victoria Square M4 5EA 77211264
313 114a Victoria Square M4 5EA 77211265
314 115a Victoria Square M4 5EA 77211266
315 116a Victoria Square M4 5EA 77211267
316 117a Victoria Square M4 5EA None
317 118a Victoria Square M4 5EA None
318 119a Victoria Square M4 5EA 77211270
319 120a Victoria Square M4 5EA 77211271
320 121a Victoria Square M4 5EA 77211272
321 122a Victoria Square M4 5EA 77211273
322 123a Victoria Square M4 5EA 77211274
323 124a Victoria Square M4 5EA None
324 125a Victoria Square M4 5EA 77211276
325 126a Victoria Square M4 5EA 77211277
326 127a Victoria Square M4 5EA 77211278
327 128a Victoria Square M4 5EA 77211279
328 129a Victoria Square M4 5EA 77211280
329 130a Victoria Square M4 5FA 77211281
330 131a Victoria Square M4 5FA 77211282
331 132a Victoria Square M4 5FA 77211283
332 133a Victoria Square M4 5FA None
333 134a Victoria Square M4 5FA 77211285
334 135a Victoria Square M4 5FA 77211286
335 136a Victoria Square M4 5FA 77211287
336 137a Victoria Square M4 5FA 77211288
337 138a Victoria Square M4 5FA 77211289
338 139a Victoria Square M4 5FA 77211290
339 140a Victoria Square M4 5FA 77211291
340 141a Victoria Square M4 5FA 77211292
341 142a Victoria Square M4 5FA 77211293
342 143a Victoria Square M4 5FA 77211294
343 144a Victoria Square M4 5FA 77211295
344 145a Victoria Square M4 5FA None
345 146a Victoria Square M4 5FA 77211297
346 147a Victoria Square M4 5FA 77211298
347 148a Victoria Square M4 5FA 77211299
348 149a Victoria Square M4 5FA 77211300
349 150a Victoria Square M4 5FA 77211301
350 151a Victoria Square M4 5FA None
351 152a Victoria Square M4 5FA 77211303
352 153a Victoria Square M4 5FA None
353 154a Victoria Square M4 5FA 77211305
354 155a Victoria Square M4 5FA None
355 156a Victoria Square M4 5FA 77211307
356 157a Victoria Square M4 5FA 77211308
357 158a Victoria Square M4 5FA 77211309
358 159a Victoria Square M4 5FA None
359 160a Victoria Square M4 5FA 77211311
360 161a Victoria Square M4 5FA None
361 162a Victoria Square M4 5FA None
362 163a Victoria Square M4 5FA 77211314
363 164a Victoria Square M4 5FA 77211315
364 165a Victoria Square M4 5FA 77211316
365 166a Victoria Square M4 5FA None
366 FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY CR2 7DL None

View file

@ -1,8 +1,22 @@
import os
from functools import lru_cache
from pydantic_settings import BaseSettings
from pydantic_settings import BaseSettings, SettingsConfigDict
from typing import Optional
def resolve_env_file() -> Optional[str]:
env = os.getenv("ENVIRONMENT", "local")
if env == "local":
return "backend/.env"
if env == "test":
return "backend/.env.test"
# prod = no env file
return None
class Settings(BaseSettings):
API_KEY: str
API_KEY_NAME: str = "X-API-KEY"
@ -41,8 +55,10 @@ class Settings(BaseSettings):
AWS_SECRET_KEY_ID: Optional[str] = None
AWS_DEFAULT_REGION: Optional[str] = None
class Config:
env_file = "backend/.env"
model_config = SettingsConfigDict(
env_file=resolve_env_file(),
env_file_encoding="utf-8",
)
@lru_cache()

View file

@ -3,7 +3,9 @@ from contextlib import contextmanager
from backend.app.config import get_settings
from sqlmodel import Session
connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
connection_string = (
"postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
)
db_string = connection_string.format(
drivername="psycopg2", # You'll need to use psycopg2 driver for PostgreSQL
username=get_settings().DB_USERNAME,
@ -28,7 +30,9 @@ db_engine = create_engine(
def get_db_session():
if db_engine is None:
raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.")
raise RuntimeError(
"Database is not configured. Set DATABASE_URL in environment variables."
)
return Session(db_engine)

View file

@ -0,0 +1,12 @@
from typing import List
from sqlalchemy import insert, delete
from sqlalchemy.orm import Session
from backend.app.db.connection import db_session, db_read_session
from backend.app.db.models.condition import PropertyConditionSurveyModel
def bulk_insert_property_surveys(
session: Session, surveys: List[PropertyConditionSurveyModel]
) -> None:
raise NotImplementedError

View file

@ -0,0 +1,97 @@
from sqlalchemy import (
BigInteger,
Column,
Date,
ForeignKey,
Integer,
String,
Enum as SqlEnum,
)
from sqlalchemy.orm import declarative_base, relationship
from backend.condition.domain.aspect_type import AspectType
from backend.condition.domain.element_type import ElementType
Base = declarative_base()
ElementTypeDb = SqlEnum(
ElementType,
name="element_type",
native_enum=True,
values_callable=lambda enum: [e.value for e in enum],
)
AspectTypeDb = SqlEnum(
AspectType,
name="aspect_type",
native_enum=True,
values_callable=lambda enum: [a.value for a in enum],
)
class PropertyConditionSurveyModel(Base):
__tablename__ = "property_condition_survey"
id = Column(BigInteger, primary_key=True, autoincrement=True)
uprn = Column(BigInteger, nullable=False)
date = Column(Date, nullable=False)
source = Column(String, nullable=False)
elements = relationship(
"ElementModel",
back_populates="survey",
cascade="all, delete-orphan",
)
class ElementModel(Base):
__tablename__ = "element" # TODO: rename to survey_element?
id = Column(BigInteger, primary_key=True, autoincrement=True)
survey_id = Column(
BigInteger,
ForeignKey("property_condition_survey.id"),
nullable=False,
)
element_type = Column(ElementTypeDb, nullable=False)
element_instance = Column(BigInteger, nullable=False)
survey = relationship(
"PropertyConditionSurveyModel",
back_populates="elements",
)
aspect_conditions = relationship(
"AspectConditionModel",
back_populates="element",
cascade="all, delete-orphan",
)
class AspectConditionModel(Base):
__tablename__ = "aspect_condition" # TODO: rename to survey_aspect?
id = Column(BigInteger, primary_key=True, autoincrement=True)
element_id = Column(
BigInteger,
ForeignKey("element.id"),
nullable=False,
)
aspect_type = Column(AspectTypeDb, nullable=False)
aspect_instance = Column(BigInteger, nullable=False)
value = Column(String)
quantity = Column(Integer)
install_date = Column(Date)
renewal_year = Column(Integer)
comments = Column(String)
element = relationship(
"ElementModel",
back_populates="aspect_conditions",
)

View file

@ -24,7 +24,7 @@ def get_cleaned():
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-{environment}".format(environment=get_settings().ENVIRONMENT)
bucket_name=get_settings().DATA_BUCKET
)
cleaned = msgpack.unpackb(cleaned, raw=False)

View file

@ -1,3 +1,4 @@
# fastapi
fastapi==0.115.2
sqlalchemy==2.0.36
@ -12,5 +13,4 @@ boto3==1.35.44
openpyxl==3.1.2
# Basic
pytz
sqlmodel
sqlmodel

View file

@ -20,7 +20,7 @@ The processor currently supports file formats provided by **Peabody** and **LBWF
The `local_runner` script allows the processor to be executed in a local environment.
1. Copy a sample input file into the `sample_data/` directory.
1. Copy sample input file(s) into the `sample_data/` directory. If working with Peabody data, you'll need the Landlord Reference / UPRN lookup file as well.
2. Update `local_runner.py` as required, specifically the definitions of:
- `lbwf_path`
- `peabody_path`

View file

@ -0,0 +1,33 @@
from enum import Enum
from typing import Optional
from pydantic import BaseModel
class ConditionFileType(Enum):
LBWF = "LBWF"
Peabody = "Peabody"
# TODO: make these asset management systems rather than client names
class ConditionTriggerRequest(BaseModel):
file_type: ConditionFileType
trigger_file_bucket: str # TODO: get this from settings
trigger_file_key: str
uprn_lookup_file_bucket: Optional[str] = None # TODO: get this from settings
uprn_lookup_file_key: Optional[str] = None
# {
# "file_type": "Peabody",
# "trigger_file_bucket": "condition-data-dev",
# "trigger_file_key": "input/peabody/2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx",
# "uprn_lookup_file_bucket": "condition-data-dev",
# "uprn_lookup_file_key": "input/peabody/uprn-lookup/PeabodyPropertymatched_Dec25_propref_UPRN.csv"
# }
# {
# "file_type": "LBWF",
# "trigger_file_bucket": "condition-data-dev",
# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx",
# }

View file

@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, Optional, Tuple
from datetime import date
from backend.condition.domain.aspect_condition import AspectCondition

View file

@ -1,16 +0,0 @@
from enum import Enum
class FileType(Enum):
LBWF = "lbwf"
Peabody = "peabody"
def detect_file_type(filepath: str) -> FileType:
path = filepath.lower()
if "lbwf" in path:
return FileType.LBWF
if "peabody" in path:
return FileType.Peabody
raise ValueError("Unrecognised file path")

View file

@ -1,16 +0,0 @@
from typing import Mapping, Any
from io import BytesIO
from utils.logger import setup_logger
from backend.condition.processor import process_file
logger = setup_logger()
def handler(event: Mapping[str, Any], context: Any) -> None:
# Temporary stub for PoC wiring
dummy_stream = BytesIO(b"")
source_key = event.get("source_key", "unknown-source")
process_file(dummy_stream, source_key)

View file

@ -0,0 +1,48 @@
FROM public.ecr.aws/lambda/python:3.11
# For local running:
# FROM python:3.11.10-bullseye
ARG DEV_DB_HOST
ARG DEV_DB_PORT
ARG DEV_DB_NAME
# Set working directory (Lambda task root)
WORKDIR /var/task
# Environment
ENV DB_HOST=${DEV_DB_HOST}
ENV DB_PORT=${DEV_DB_PORT}
ENV DB_NAME=${DEV_DB_NAME}
COPY backend/.env.test backend/.env
# -----------------------------
# Copy requirements FIRST (for Docker layer caching)
# -----------------------------
COPY backend/condition/handler/requirements.txt .
# Install dependencies into Lambda runtime
RUN pip install --no-cache-dir -r requirements.txt
# -----------------------------
# Copy application code
# -----------------------------
COPY utils/ utils/
COPY backend/condition/ backend/condition/
COPY backend/app/db/models/condition.py backend/app/db/models/condition.py
COPY backend/app/db/connection.py backend/app/db/connection.py
COPY backend/app/config.py backend/app/config.py
COPY backend/__init__.py backend/__init__.py
COPY backend/app/__init__.py backend/app/__init__.py
COPY backend/app/db/__init__.py backend/app/db/__init__.py
# -----------------------------
# Lambda handler
# -----------------------------
CMD ["backend/condition/handler/handler.handler"]
# For local running
# CMD ["python", "-m", "backend.condition.handler.handler"]

View file

@ -0,0 +1,51 @@
import json
from typing import Mapping, Any
from io import BytesIO
from backend.condition.condition_trigger_request import ConditionTriggerRequest
from backend.condition.lookups.uprn_lookup_s3 import UprnLookupS3
from backend.condition.processor import process_file
from utils.logger import setup_logger
from utils.s3 import read_io_from_s3
logger = setup_logger()
def handler(event: Mapping[str, Any], context: Any) -> None:
for record in event.get("Records", []):
try:
body_dict = json.loads(record["body"])
logger.debug("Validating request body")
payload = ConditionTriggerRequest.model_validate(body_dict)
logger.debug("Successfully validated request body")
if payload.uprn_lookup_file_bucket and payload.uprn_lookup_file_key:
logger.debug("Getting UPRN lookup file from s3")
uprn_lookup = UprnLookupS3(
bucket=payload.uprn_lookup_file_bucket,
key=payload.uprn_lookup_file_key,
) # TODO: replace with postgres implementation
logger.debug("Successfully got UPRN lookup file from s3")
else:
uprn_lookup = None
logger.debug("Getting conditions data from s3")
file_bytes: BytesIO = read_io_from_s3(
bucket_name=payload.trigger_file_bucket,
file_key=payload.trigger_file_key,
)
logger.debug(
"Successfully got conditions data from s3. Moving on to process file..."
)
process_file(
file_stream=file_bytes,
file_type=payload.file_type,
uprn_lookup=uprn_lookup,
)
except Exception as e:
logger.error(f"Failed to process record: {e}")

View file

@ -0,0 +1,9 @@
openpyxl
sqlmodel
pydantic-settings
psycopg2-binary==2.9.10
# pandas isn't used, but needed for importing from utils.s3
pandas==2.2.2
numpy==1.26.4
openpyxl

View file

@ -1,5 +1,7 @@
from pathlib import Path
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
from backend.condition.processor import process_file
@ -20,13 +22,27 @@ def main() -> None:
/ "peabody"
/ "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx"
)
filepaths = [lbwf_path, peabody_path]
peabody_uprn_lookup_path: Path = (
path / "peabody" / "PeabodyPropertymatched_Dec25_propref_UPRN.csv"
)
# filepaths = [lbwf_path, peabody_path]
filepaths = [lbwf_path]
# filepaths = [peabody_path]
uprn_lookup = UprnLookupLocal(csv_path=peabody_uprn_lookup_path.as_posix())
def get_file_type(file_path: str) -> ConditionFileType:
if "peabody" in file_path:
return ConditionFileType.Peabody
if "lbwf" in file_path:
return ConditionFileType.LBWF
for fp in filepaths:
with fp.open("rb") as f:
process_file(
file_stream=f,
source_key=fp.as_posix(),
file_type=get_file_type(fp.as_posix()),
uprn_lookup=uprn_lookup,
)

View file

@ -0,0 +1,8 @@
from abc import ABC, abstractmethod
from typing import BinaryIO, Dict
class UprnLookup(ABC):
@abstractmethod
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
pass

View file

@ -0,0 +1,23 @@
import csv
from io import TextIOWrapper
from typing import BinaryIO, Dict, TextIO
from backend.condition.lookups.uprn_lookup import UprnLookup
class UprnLookupLocal(UprnLookup):
def __init__(self, csv_path: str):
self.csv_path = csv_path
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
with open(self.csv_path, "rb") as f:
return self.parse_csv(f)
def parse_csv(self, file_stream: BinaryIO) -> Dict[str, int]:
text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8")
mapping: Dict[str, int] = {}
reader = csv.DictReader(text_stream)
for row in reader:
if not row["reference"] or not row["out_uprn"]:
continue
mapping[row["reference"].strip()] = int(row["out_uprn"].strip())
return mapping

View file

@ -0,0 +1,29 @@
import csv
from io import BytesIO, TextIOWrapper
from typing import BinaryIO, Dict, TextIO
from backend.condition.lookups.uprn_lookup import UprnLookup
from utils.s3 import read_io_from_s3
class UprnLookupS3(UprnLookup):
def __init__(self, bucket: str = "", key: str = ""):
self.bucket = bucket
self.key = key
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
file_bytes: BytesIO = read_io_from_s3(
bucket_name=self.bucket, file_key=self.key
)
return self._parse_csv_bytes(file_bytes)
def _parse_csv_bytes(self, file_stream: BinaryIO) -> Dict[str, int]:
text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8")
mapping: Dict[str, int] = {}
reader = csv.DictReader(text_stream)
for row in reader:
if not row["reference"] or not row["out_uprn"]:
continue
mapping[row["reference"].strip()] = int(row["out_uprn"].strip())
return mapping

View file

@ -1,27 +1,35 @@
from typing import Optional
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.domain.mapping.lbwf.lbwf_mapper import LbwfMapper
from backend.condition.domain.mapping.mapper import Mapper
from backend.condition.domain.mapping.peabody.peabody_mapper import PeabodyMapper
from backend.condition.file_type import FileType
from backend.condition.lookups.uprn_lookup import UprnLookup
from backend.condition.parsing.parser import Parser
from backend.condition.parsing.lbwf_parser import LbwfParser
from backend.condition.parsing.peabody_parser import PeabodyParser
def select_parser(file_type: FileType) -> Parser:
if file_type is FileType.LBWF:
def select_parser(
file_type: ConditionFileType, uprn_lookup: Optional[UprnLookup] = None
) -> Parser:
if file_type is ConditionFileType.LBWF:
return LbwfParser()
if file_type is FileType.Peabody:
return PeabodyParser()
if file_type is ConditionFileType.Peabody:
if not uprn_lookup:
raise ValueError(
"Cannot instantiate Peabody Parser without UPRN lookup being provided"
)
return PeabodyParser(uprn_lookup=uprn_lookup)
raise ValueError("Unrecognised file type, unable to instantiate Parser")
def select_mapper(file_type: FileType) -> Mapper:
if file_type is FileType.LBWF:
def select_mapper(file_type: ConditionFileType) -> Mapper:
if file_type is ConditionFileType.LBWF:
return LbwfMapper()
if file_type is FileType.Peabody:
if file_type is ConditionFileType.Peabody:
return PeabodyMapper()
raise ValueError("Unrecognised file type, unable to instantiate Mapper")

View file

@ -1,4 +1,4 @@
from typing import BinaryIO, Any, Dict, Iterator, List, Tuple
from typing import BinaryIO, Any, Dict, Iterator, List, Optional, Tuple
from openpyxl import Workbook, load_workbook
from collections import defaultdict
@ -15,7 +15,10 @@ logger = setup_logger()
class LbwfParser(Parser):
def parse(self, file_stream: BinaryIO) -> Any:
def parse(
self,
file_stream: BinaryIO,
) -> Any:
wb: Workbook = load_workbook(file_stream)
address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(
wb

View file

@ -1,8 +1,12 @@
from abc import ABC, abstractmethod
from typing import BinaryIO, Any
from typing import BinaryIO, Any, Dict, Optional
class Parser(ABC):
@abstractmethod
def parse(self, file_stream: BinaryIO) -> Any:
pass
def parse(
self,
file_stream: BinaryIO,
) -> Any:
pass

View file

@ -1,26 +1,43 @@
from typing import Any, BinaryIO, Dict, Iterator, List, Tuple, DefaultDict
import csv
from pathlib import Path
from typing import Any, BinaryIO, Dict, List, Optional, Tuple, DefaultDict
from openpyxl import Workbook, load_workbook
from collections import defaultdict
from backend.condition.lookups.uprn_lookup import UprnLookup
from backend.condition.parsing.parser import Parser
from backend.condition.parsing.records.peabody.peabody_asset_condition import PeabodyAssetCondition
from backend.condition.parsing.records.peabody.peabody_asset_condition import (
PeabodyAssetCondition,
)
from backend.condition.parsing.records.peabody.peabody_property import PeabodyProperty
from utils.logger import setup_logger
logger = setup_logger()
class PeabodyParser(Parser):
def parse(self, file_stream: BinaryIO) -> Any:
wb: Workbook = load_workbook(file_stream)
address_to_uprn_map: Dict[str, int] = PeabodyParser._generate_address_to_uprn_dict(wb)
assets = self._parse_assets(wb)
return self._group_assets_into_properties(
assets=assets,
address_to_uprn_map=address_to_uprn_map,
class PeabodyParser(Parser):
def __init__(self, uprn_lookup: UprnLookup):
self.uprn_lookup: UprnLookup = uprn_lookup # TODO: move this to the ABC?
def parse(
self,
file_stream: BinaryIO,
) -> Any:
file_stream.seek(0)
logger.debug("[PeabodyParser] Loading workbook...")
wb: Workbook = load_workbook(file_stream, read_only=True, data_only=True)
logger.debug("[PeabodyParser] Successfully loaded workbook. Parsing assets...")
assets = PeabodyParser._parse_assets(wb)
logger.debug(
"[PeabodyParser] Successfully parsed assets. Parsing UPRN lookup..."
)
location_ref_to_uprn_map = self.uprn_lookup.get_property_ref_to_uprn_lookup()
logger.debug("[PeabodyParser] Successfully parsed UPRN lookup")
return PeabodyParser._group_assets_into_properties(
assets=assets,
location_ref_to_uprn_map=location_ref_to_uprn_map,
)
@staticmethod
def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]:
@ -33,39 +50,44 @@ class PeabodyParser(Parser):
assets: List[PeabodyAssetCondition] = []
for row in asset_rows:
try:
asset = PeabodyParser._map_row_to_asset_record(row, asset_header_indexes)
asset = PeabodyParser._map_row_to_asset_record(
row, asset_header_indexes
)
if not asset.is_block_level:
# Block-level condition surveys are out of scope for now
# until we have a wider think on how to handle block
assets.append(asset) # TODO: handle block-level assets
# until we have a wider think on how to handle blocks
assets.append(asset) # TODO: handle block-level assets
except Exception as e:
logger.error(f"Error mapping Peabody row to asset record: {e}")
continue
return assets
@staticmethod
def _group_assets_into_properties(
assets: List[PeabodyAssetCondition],
address_to_uprn_map: Dict[str, int],
location_ref_to_uprn_map: Dict[str, int],
) -> List[PeabodyProperty]:
assets_by_address: DefaultDict[str, List[PeabodyAssetCondition]] = defaultdict(list)
assets_by_location_reference: DefaultDict[str, List[PeabodyAssetCondition]] = (
defaultdict(list)
)
for asset in assets:
if asset.full_address is None:
if asset.lo_reference is None:
continue
address = asset.full_address.strip()
assets_by_address[address].append(asset)
assets_by_location_reference[asset.lo_reference].append(asset)
properties: List[PeabodyProperty] = []
failed_mappings_count = 0
for address, grouped_assets in assets_by_address.items():
uprn = address_to_uprn_map.get(address)
for location_ref, grouped_assets in assets_by_location_reference.items():
uprn = location_ref_to_uprn_map.get(location_ref)
if uprn is None:
logger.warning(f"No UPRN found for address: {address}")
failed_mappings_count += 1
continue
properties.append(
@ -75,9 +97,9 @@ class PeabodyParser(Parser):
)
)
logger.warning(f"No UPRN found for {failed_mappings_count} Location References")
return properties
@staticmethod
def _map_row_to_asset_record(
row: Any | Tuple[object | None, ...],
@ -102,39 +124,9 @@ class PeabodyParser(Parser):
condition_survey_date=row[header_indexes["condition_survey_date"]],
)
@staticmethod
def _generate_address_to_uprn_dict(wb: Workbook) -> Dict[str, int | None]:
sheet = wb["Survey Records - D & Lower"]
rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True)
headers = next(rows)
header_indexes: Dict[str, int] = PeabodyParser._get_column_indexes_by_name(headers)
address_idx = header_indexes["full_address"]
address_to_uprn: Dict[str, int] = {}
# Generate random UPRNs for now
next_uprn = 1 # TODO: get real UPRNs
for row in rows:
address = row[address_idx]
if address is None:
continue
address = address.strip()
if address not in address_to_uprn:
address_to_uprn[address] = next_uprn
next_uprn += 1
return address_to_uprn
@staticmethod
def _get_column_indexes_by_name(
headers: Tuple[object | None, ...]
headers: Tuple[object | None, ...],
) -> Dict[str, int]:
index: Dict[str, int] = {}
@ -142,4 +134,4 @@ class PeabodyParser(Parser):
if isinstance(header, str):
index[header] = i
return index
return index

View file

@ -0,0 +1,87 @@
import time
from typing import List, Optional
from sqlmodel import Session
from utils.logger import setup_logger
from backend.app.db.models.condition import (
AspectConditionModel,
ElementModel,
PropertyConditionSurveyModel,
)
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
from backend.app.db.connection import db_session
logger = setup_logger()
class ConditionPostgres:
def bulk_insert_surveys(
self, surveys: List[PropertyConditionSurvey], batch_size: Optional[int] = 100
) -> None:
logger.debug(
f"[ConditionPostgres] Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..."
)
survey_models: List[PropertyConditionSurveyModel] = [
ConditionPostgres.map_survey_to_model(s) for s in surveys
]
total: int = len(survey_models)
logger.debug(
f"[ConditionPostgres] Finished mapping {total} surveys. Writing to database in batches of {batch_size}..."
)
with db_session() as session:
logger.info("[ConditionPostgres] Successfully made connection to database")
for start in range(0, total, batch_size):
end = min(start + batch_size, total)
batch = survey_models[start:end]
t0: float = time.perf_counter()
ConditionPostgres._insert_surveys_batch(batch, session)
elapsed: float = time.perf_counter() - t0
logger.info(
f"Inserted batch {start} - {end} ({len(batch)} surveys) in {elapsed} seconds",
)
@staticmethod
def map_survey_to_model(
survey: PropertyConditionSurvey,
) -> PropertyConditionSurveyModel:
survey_model = PropertyConditionSurveyModel(
uprn=survey.uprn,
date=survey.date,
source=survey.source,
elements=[],
)
for element in survey.elements:
element_model = ElementModel(
element_type=element.element_type,
element_instance=element.element_instance,
aspect_conditions=[],
)
for aspect in element.aspect_conditions:
aspect_model = AspectConditionModel(
aspect_type=aspect.aspect_type,
aspect_instance=aspect.aspect_instance,
value=aspect.value,
quantity=aspect.quantity,
install_date=aspect.install_date,
renewal_year=aspect.renewal_year,
comments=aspect.comments,
)
element_model.aspect_conditions.append(aspect_model)
survey_model.elements.append(element_model)
return survey_model
@staticmethod
def _insert_surveys_batch(
surveys: List[PropertyConditionSurveyModel], session: Session
) -> None:
session.add_all(surveys)
session.commit()

View file

@ -1,25 +1,38 @@
from typing import Any, BinaryIO, List
from typing import Any, BinaryIO, List, Optional
from datetime import datetime
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.lookups.uprn_lookup import UprnLookup
from utils.logger import setup_logger
from backend.condition.domain.mapping.mapper import Mapper
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
from backend.condition.parsing.parser import Parser
from utils.logger import setup_logger
from backend.condition.file_type import FileType, detect_file_type
from backend.condition.persistence.condition_postgres import ConditionPostgres
from backend.condition.parsing.factory import select_parser, select_mapper
logger = setup_logger()
def process_file(file_stream: BinaryIO, source_key: str) -> None:
print(f"[processor] Received file: {source_key}")
def process_file(
file_stream: BinaryIO,
file_type: ConditionFileType,
uprn_lookup: Optional[UprnLookup],
) -> None:
# Instantiation
file_type: FileType = detect_file_type(source_key)
parser: Parser = select_parser(file_type)
logger.debug(f"[processor] Instantiating classes...")
parser: Parser = select_parser(file_type, uprn_lookup)
mapper: Mapper = select_mapper(file_type)
persistence = ConditionPostgres()
logger.debug(f"[processor] Finished instantiating classes. Calling Parser...")
# Orchestration
raw_properties: List[Any] = parser.parse(file_stream)
logger.info(
f"[processor] Finished loading customer survey data for {len(raw_properties)} properties. Mapping..."
)
survey_year = datetime.now().year # TODO: get this from filepath or elsewhere
property_condition_surveys: List[PropertyConditionSurvey] = []
@ -29,4 +42,10 @@ def process_file(file_stream: BinaryIO, source_key: str) -> None:
mapper.map_asset_conditions_for_property(p, survey_year)
)
print("done") # temp
logger.info(
f"[processor] Finished mapping {len(property_condition_surveys)} properties. Writing to database..."
)
persistence.bulk_insert_surveys(property_condition_surveys)
logger.info(f"[processor] Finished loading surveys to database")

View file

@ -1,3 +1,4 @@
from backend.app.db.models.condition import PropertyConditionSurveyModel
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
@ -72,3 +73,41 @@ class CustomAsserts:
f"{actual_aspect.comments} != {expected_aspect.comments}"
)
return True
def assert_property_condition_survey_model_matches_expected(
actual_model: PropertyConditionSurveyModel,
expected: dict,
) -> None:
assert actual_model.uprn == expected["uprn"], "UPRN differs"
assert actual_model.date == expected["date"], "Date differs"
assert actual_model.source == expected["source"], "Source differs"
assert len(actual_model.elements) == len(expected["elements"]), (
f"Expected {len(expected['elements'])} elements, "
f"got {len(actual_model.elements)}"
)
for i, (actual_element, expected_element) in enumerate(
zip(actual_model.elements, expected["elements"])
):
assert (
actual_element.element_type == expected_element["element_type"]
), f"Element[{i}].element_type differs"
assert (
actual_element.element_instance == expected_element["element_instance"]
), f"Element[{i}].element_instance differs"
assert len(actual_element.aspect_conditions) == len(
expected_element["aspects"]
), f"Element[{i}] aspect count differs"
for j, (actual_aspect, expected_aspect) in enumerate(
zip(actual_element.aspect_conditions, expected_element["aspects"])
):
prefix = f"Element[{i}].Aspect[{j}]"
for key, value in expected_aspect.items():
assert getattr(actual_aspect, key) == value, (
f"{prefix}.{key} differs: "
f"{getattr(actual_aspect, key)} != {value}"
)

View file

@ -0,0 +1,34 @@
import pytest
from typing import Dict
from tempfile import NamedTemporaryFile
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
@pytest.fixture
def prop_ref_uprn_csv_file() -> str:
csv_content = """reference,out_uprn
ABC123,10000000001
DEF456,10000000002
GHI789,10000000003
"""
with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp:
tmp.write(csv_content)
tmp.flush()
return tmp.name
def test_generate_prop_ref_uprn_from_csv_file(prop_ref_uprn_csv_file: str) -> None:
# arrange
uprn_lookup = UprnLookupLocal(prop_ref_uprn_csv_file)
expected_map: Dict[str, int] = {
"ABC123": 10000000001,
"DEF456": 10000000002,
"GHI789": 10000000003,
}
# act
actual_map: Dict[str, int] = uprn_lookup.get_property_ref_to_uprn_lookup()
# assert
assert actual_map == expected_map

View file

@ -1,11 +1,13 @@
import pytest
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
from backend.condition.parsing.factory import select_parser
from backend.condition.file_type import FileType
def test_selects_lbwf_parser():
# arrange
file_type = FileType.LBWF
file_type = ConditionFileType.LBWF
expected_class_name = "LbwfParser"
# act
@ -14,13 +16,15 @@ def test_selects_lbwf_parser():
# assert
assert expected_class_name == actual_class_name
def test_selects_peabody_parser():
# arrange
file_type = FileType.Peabody
file_type = ConditionFileType.Peabody
expected_class_name = "PeabodyParser"
uprn_lookup = UprnLookupLocal(csv_path="test")
# act
actual_class_name = select_parser(file_type).__class__.__name__
actual_class_name = select_parser(file_type, uprn_lookup).__class__.__name__
# assert
assert expected_class_name == actual_class_name
assert expected_class_name == actual_class_name

View file

@ -1,127 +1,143 @@
from tempfile import NamedTemporaryFile
import pytest
from typing import Any
from typing import Any, Dict
from io import BytesIO
from openpyxl import Workbook
from datetime import datetime
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
from backend.condition.parsing.peabody_parser import PeabodyParser
from backend.condition.parsing.records.peabody.peabody_asset_condition import PeabodyAssetCondition
from backend.condition.parsing.records.peabody.peabody_asset_condition import (
PeabodyAssetCondition,
)
from backend.condition.parsing.records.peabody.peabody_property import PeabodyProperty
@pytest.fixture
def peabody_assets_xlsx_bytes() -> BytesIO:
wb = Workbook()
survey_records_d_and_lower = wb.active
survey_records_d_and_lower.title = "Survey Records - D & Lower"
survey_records_d_and_lower.append([
"Lo_Reference",
"full_address",
"location_type_code",
"Parent_Lo_Reference",
"Element_Code",
"Element",
"Sub_Element_Code",
"Sub_Element",
"Material_Code",
"material_or_answer",
"Renewal_Quantity",
"Renewal_Year",
"Renewal_Cost",
"cloned",
"lo_type_code",
"condition_survey_date",
])
survey_records_d_and_lower.append([
"B000RAND",
"1 RANDOM HOUSE LONDON",
3,
"RAND2EST",
110,
"ROOFS",
1,
"Primary Roof",
9,
"Other",
3,
2054,
330,
"N",
3,
datetime(2025,12,4,9,17,0)
])
survey_records_d_and_lower.append([
"B000BLOCK",
"1100 BLOCK",
3,
"RAND2EST",
110,
"ROOFS",
1,
"Primary Roof",
9,
"Other",
3,
2054,
330,
"N",
3,
datetime(2025,12,4,9,17,0)
])
survey_records_d_and_lower.append([
"B000FAKE",
"3 FAKE CLOSE LONDON",
3,
"FAKEEST",
100,
"GENERAL",
15,
"External Decoration",
2,
"Normal",
1,
2035,
1500.7,
"N",
3,
datetime(2025,7,5,0,0,0)
])
survey_records_d_and_lower.append([
"B000MIS",
"99 MISC ROAD LONDON",
3,
"300828",
54,
"HHSRS",
29,
"HHSRS Structural Collapse & Falling Elements",
4,
"HHSRS Moderate",
2,
2027,
None,
"N",
3,
None
])
survey_records_d_and_lower.append([
"B000MIS",
"99 MISC ROAD LONDON",
3,
"300828",
53,
"External",
2,
"Chimney",
2,
"Present",
33,
2053,
3531,
"N",
3,
None
])
survey_records_d_and_lower.append(
[
"Lo_Reference",
"full_address",
"location_type_code",
"Parent_Lo_Reference",
"Element_Code",
"Element",
"Sub_Element_Code",
"Sub_Element",
"Material_Code",
"material_or_answer",
"Renewal_Quantity",
"Renewal_Year",
"Renewal_Cost",
"cloned",
"lo_type_code",
"condition_survey_date",
]
)
survey_records_d_and_lower.append(
[
"B000RAND",
"1 RANDOM HOUSE LONDON",
3,
"RAND2EST",
110,
"ROOFS",
1,
"Primary Roof",
9,
"Other",
3,
2054,
330,
"N",
3,
datetime(2025, 12, 4, 9, 17, 0),
]
)
survey_records_d_and_lower.append(
[
"B000BLOCK",
"1100 BLOCK",
3,
"RAND2EST",
110,
"ROOFS",
1,
"Primary Roof",
9,
"Other",
3,
2054,
330,
"N",
3,
datetime(2025, 12, 4, 9, 17, 0),
]
)
survey_records_d_and_lower.append(
[
"B000FAKE",
"3 FAKE CLOSE LONDON",
3,
"FAKEEST",
100,
"GENERAL",
15,
"External Decoration",
2,
"Normal",
1,
2035,
1500.7,
"N",
3,
datetime(2025, 7, 5, 0, 0, 0),
]
)
survey_records_d_and_lower.append(
[
"B000MIS",
"99 MISC ROAD LONDON",
3,
"300828",
54,
"HHSRS",
29,
"HHSRS Structural Collapse & Falling Elements",
4,
"HHSRS Moderate",
2,
2027,
None,
"N",
3,
None,
]
)
survey_records_d_and_lower.append(
[
"B000MIS",
"99 MISC ROAD LONDON",
3,
"300828",
53,
"External",
2,
"Chimney",
2,
"Present",
33,
2053,
3531,
"N",
3,
None,
]
)
stream = BytesIO()
wb.save(stream)
@ -129,9 +145,27 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
return stream
def test_peabody_parser_parses_conditions(peabody_assets_xlsx_bytes):
@pytest.fixture
def prop_ref_uprn_csv_file() -> str:
csv_content = """reference,out_uprn
B000RAND,1
B000BLOCK,2
B000FAKE,3
B000MIS,4
"""
with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp:
tmp.write(csv_content)
tmp.flush()
return tmp.name
def test_peabody_parser_parses_conditions(
peabody_assets_xlsx_bytes, prop_ref_uprn_csv_file
):
# arrange
parser = PeabodyParser()
uprn_lookup = UprnLookupLocal(csv_path=prop_ref_uprn_csv_file)
parser = PeabodyParser(uprn_lookup=uprn_lookup)
# act
result: Any = parser.parse(peabody_assets_xlsx_bytes)
@ -141,6 +175,7 @@ def test_peabody_parser_parses_conditions(peabody_assets_xlsx_bytes):
assert all(isinstance(item, PeabodyProperty) for item in result)
@pytest.fixture
def asset_condition_factory():
def _factory(full_address: str) -> PeabodyAssetCondition:
@ -165,6 +200,7 @@ def asset_condition_factory():
return _factory
@pytest.mark.parametrize(
"full_address, expected_block_level",
[
@ -175,7 +211,7 @@ def asset_condition_factory():
("81A-B GORE ROAD LONDON", True),
("73 & 74 HARVEST COURT ST. ALBANS", True),
("25 HAVERSHAM COURT GREENFORD", False),
("FLAT 10 SPARROW COURT SOUTHMERE DRIVE LONDON SE2 9ES", False)
("FLAT 10 SPARROW COURT SOUTHMERE DRIVE LONDON SE2 9ES", False),
],
)
def test_peabody_asset_is_block_level(
@ -187,4 +223,4 @@ def test_peabody_asset_is_block_level(
asset_condition = asset_condition_factory(full_address)
# act + assert
assert asset_condition.is_block_level == expected_block_level
assert asset_condition.is_block_level == expected_block_level

View file

@ -0,0 +1,164 @@
import pytest
from datetime import date
from backend.condition.persistence.condition_postgres import ConditionPostgres
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
from backend.condition.domain.element import Element
from backend.condition.domain.element_type import ElementType
from backend.condition.domain.aspect_condition import AspectCondition
from backend.condition.domain.aspect_type import AspectType
from backend.app.db.models.condition import PropertyConditionSurveyModel
from backend.condition.tests.custom_asserts import CustomAsserts
def test_map_survey_to_model() -> None:
# arrange
survey = PropertyConditionSurvey(
uprn=1,
elements=[
Element(
element_type=ElementType.EXTERNAL_WINDOWS,
element_instance=1,
aspect_conditions=[
AspectCondition(
aspect_type=AspectType.MATERIAL,
aspect_instance=1,
value="UPVC Double Glazed",
quantity=8,
install_date=None,
renewal_year=2036,
comments=None,
),
],
),
Element(
element_type=ElementType.EXTERNAL_DECORATION,
element_instance=1,
aspect_conditions=[
AspectCondition(
aspect_type=AspectType.CONDITION,
aspect_instance=1,
value="Normal",
quantity=1,
install_date=None,
renewal_year=2029,
comments=None,
)
],
),
Element(
element_type=ElementType.EXTERNAL_WALL,
element_instance=1,
aspect_conditions=[
AspectCondition(
aspect_type=AspectType.FINISH,
aspect_instance=1,
value="Pointed",
quantity=65,
install_date=None,
renewal_year=2045,
comments=None,
),
AspectCondition(
aspect_type=AspectType.FINISH,
aspect_instance=1,
value="Pointing",
quantity=1,
install_date=None,
renewal_year=2069,
comments=None,
),
AspectCondition(
aspect_type=AspectType.FINISH,
aspect_instance=2,
value="Tile Hung",
quantity=8,
install_date=None,
renewal_year=2049,
comments=None,
),
],
),
],
date=date(2000, 1, 1),
source="Peabody",
)
expected = {
"uprn": 1,
"date": date(2000, 1, 1),
"source": "Peabody",
"elements": [
{
"element_type": ElementType.EXTERNAL_WINDOWS,
"element_instance": 1,
"aspects": [
{
"aspect_type": AspectType.MATERIAL,
"aspect_instance": 1,
"value": "UPVC Double Glazed",
"quantity": 8,
"install_date": None,
"renewal_year": 2036,
"comments": None,
}
],
},
{
"element_type": ElementType.EXTERNAL_DECORATION,
"element_instance": 1,
"aspects": [
{
"aspect_type": AspectType.CONDITION,
"aspect_instance": 1,
"value": "Normal",
"quantity": 1,
"install_date": None,
"renewal_year": 2029,
"comments": None,
}
],
},
{
"element_type": ElementType.EXTERNAL_WALL,
"element_instance": 1,
"aspects": [
{
"aspect_instance": 1,
"value": "Pointed",
"quantity": 65,
"install_date": None,
"renewal_year": 2045,
"comments": None,
},
{
"aspect_type": AspectType.FINISH,
"aspect_instance": 1,
"value": "Pointing",
"quantity": 1,
"install_date": None,
"renewal_year": 2069,
"comments": None,
},
{
"aspect_type": AspectType.FINISH,
"aspect_instance": 2,
"value": "Tile Hung",
"quantity": 8,
"install_date": None,
"renewal_year": 2049,
"comments": None,
},
],
},
],
}
# act
model: PropertyConditionSurveyModel = ConditionPostgres.map_survey_to_model(survey)
# assert (survey level)
CustomAsserts.assert_property_condition_survey_model_matches_expected(
model,
expected,
)

View file

@ -1,22 +0,0 @@
import pytest
from backend.condition.file_type import FileType, detect_file_type
def test_detects_lbwf_file_type():
# arrange
file_path_str = "uploads/lbwf/Exaple Asset Data.xlsx"
expected_file_type = FileType.LBWF
# act
actual_file_type: FileType = detect_file_type(file_path_str)
# assert
assert expected_file_type == actual_file_type
def test_unknown_filepath_raises_value_error():
# arrange
file_path_str = "unknown/Example Asset Data.xlsx"
# act + assert
with pytest.raises(ValueError):
detect_file_type(file_path_str)

View file

@ -978,13 +978,15 @@ async def model_engine(body: PlanTriggerRequest):
recommendations_scoring_data.extend(p.recommendations_scoring_data)
logger.info("Preparing data for scoring in sap change api")
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data).drop(
columns=[
"rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"
]
)
# Temp putting this here
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
if not recommendations_scoring_data.empty:
recommendations_scoring_data = recommendations_scoring_data.drop(
columns=[
"rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"
]
)
# TODO: Temp putting this here
recommendations_scoring_data["is_post_sap10_ending"] = True
all_predictions = await model_api.async_paginated_predictions(

View file

@ -1,3 +1,4 @@
# Pandas and numpy
numpy==2.1.2
pandas==2.2.3
@ -22,4 +23,4 @@ pyarrow==17.0.0
fastparquet==2024.5.0
aiohttp==3.10.10
# find my epc
beautifulsoup4
beautifulsoup4

View file

@ -313,4 +313,15 @@ class ModelApi:
logger.error(f"Batch {chunk}-{chunk + batch_size} failed (Attempt {attempts}): {e}")
await asyncio.sleep(2 ** attempts) # exponential backoff
await self.close_aiohttp_session()
# Ensure stable output structure for the datagrame to be utilised by other functions downstream
for k in all_predictions.keys():
if all_predictions[k].empty:
col_template = ['id', 'predictions', 'property_id', 'recommendation_id', 'phase'] if (
extract_ids) else ['id', 'predictions']
all_predictions[k] = pd.DataFrame(
columns=col_template
)
return all_predictions

View file

@ -0,0 +1,102 @@
# Retrofit Property Data Onboarding
This repository contains an ETL pipeline for transforming raw retrofit property data from external source systems (
currently Parity) into a standardised internal format, compatible for both address2uprn and engine.
The pipeline is designed to:
- Run as an AWS Lambda triggered by SQS
- Read raw CSV/XLSX files from S3
- Perform rule-based mappings
- Infer as built property attributes, assumed based on age
- Output a processed csv, back to s3 to be consumed by address2uprn
### Structure
SQS → Lambda handler → OnboarderFactory → System-specific Onboarder → Mapping → CSV to S3
Each source system implements its own **Onboarder**, while sharing a common base and mapping process.
---
### Repository Structure
onboarders/
├── `handler.py` # Lambda entrypoint \
├── `factory.py` # Onboarder factory \
├── `base.py` # Shared onboarding base class \
├── `parity.py` # Parity-specific transformation logic \
├── `mappings/` \
│ └── `parity/` # Parity domain mappings & classifiers \
│ ├── `age_band.py` \
│ ├── `property_type.py` \
│ ├── `built_form.py` \
│ ├── `walls.py` \
│ ├── `roof.py` \
│ ├── `floor.py` \
│ ├── `glazing.py` \
│ ├── `heating.py` \
│ ├── `as_built_wall_classifiers.py` \
│ ├── `as_built_roof_classifiers.py` \
│ └── `as_built_floor_classifiers.py` \
├── `tests/` \
├── `requirements.txt` \
└── `README.md`
---
### Lambda Entry Point (`handler.py`)
The Lambda handler:
1. Consumes SQS queue
2. Validates the payload
3. Instantiates the correct onboarder via `OnboarderFactory`
4. Runs the transformation
5. Writes the transformed CSV back to S3
### Expected Event Payload
```json
{
"s3_uri": "s3://bucket/path/to/input.xlsx",
"system": "parity",
"format": "xlsx",
"sheet_name": "Sustainability"
}
```
### Onboarder Base `(base.py)`
OnboarderBase provides shared functionality across all systems.
*Responsibilities*
- Reading CSV/XLSX files from S3
- Writing transformed CSVs to S3
- Defining canonical output column names
- Providing validation helpers
- Common output - for the moment, onboards will be expected to return a csv
### Parity Onboarder `(parity.py)`
`ParityOnboarder` contains all Parity-specific transformation logic.
Responsibilities*
- Map raw Parity fields to internal EPC-aligned enums
- Infer “as-built” constructions using age bands when insulation data is missing
- Resolve energy efficiency ratings deterministically
- Normalise output into a fixed schema
The `transform()` method orchestrates the transformation process.
### TODOs
- In `backend/onboarders/mappings/parity/glazing.py` we currently map the partiy descriptions
to duples of descriptions and efficiency ratings. This is okay for the moment but we may consider
using a data class, just given how error-prone this is.
- This is also true for heating mappings in `backend/onboarders/mappings/parity/heating.py`
- Implement a AI-enabled version, to replace the standardised asset list

View file

View file

@ -0,0 +1,84 @@
import pandas as pd
from utils.s3 import read_from_s3, read_excel_from_s3, save_csv_to_s3
class OnboarderBase:
# Input dataset to be transformed
data: pd.DataFrame | None = None
bucket_name = None
input_file_name = None
output_file_name = None
# Description columns
landlord_wall_construction: str = "landlord_wall_construction"
landlord_roof_construction: str = "landlord_roof_construction"
landlord_floor_construction: str = "landlord_floor_construction"
landlord_windows_type: str = "landlord_windows_type"
landlord_heating_construction: str = "landlord_heating_construction"
landlord_fuel_type: str = "landlord_fuel_type"
landlord_heating_controls: str = "landlord_heating_controls"
landlord_hot_water_system: str = "landlord_hot_water_system"
# Efficiency columns
landlord_roof_efficiency: str = "landlord_roof_efficiency"
landlord_windows_efficiency: str = "landlord_windows_efficiency"
landlord_heating_controls_efficiency: str = "landlord_heating_controls_efficiency"
landlord_heating_efficiency: str = "landlord_heating_efficiency"
landlord_hot_water_efficiency: str = "landlord_hot_water_efficiency"
landlord_wall_efficiency: str = "landlord_wall_efficiency"
# Additional windows features
landlord_multi_glaze_proportion: str = "landlord_multi_glaze_proportion"
landlord_glazed_type: str = "landlord_glazed_type"
landlord_glazed_area: str = "landlord_glazed_area"
# Additional roof features
landlord_has_sloping_ceiling: str = "landlord_has_sloping_ceiling"
# Shape, dimensions, age
landlord_total_floor_area_m2: str = "landlord_total_floor_area_m2"
landlord_construction_age_band: str = "landlord_construction_age_band"
landlord_property_type: str = "landlord_property_type"
landlord_built_form: str = "landlord_built_form"
def read_s3(self, file_format, **kwargs):
if self.input_file_name is None or self.bucket_name is None:
raise ValueError("Bucket name and input file name must be set before reading from S3.")
if file_format == "xlsx":
self.data = read_excel_from_s3(
bucket_name=self.bucket_name,
file_key=self.input_file_name,
sheet_name=kwargs.get("sheet_name"),
header_row=kwargs.get("header_row", 0)
)
else:
self.data = read_from_s3(bucket_name=self.bucket_name, s3_file_name=self.input_file_name)
def write(self):
if self.data is None:
raise ValueError("No data to write. Please run transform() before writing.")
if self.bucket_name is None or self.output_file_name is None:
raise ValueError("Bucket name and output file name must be set before writing to S3.")
# Store file as csv - will store in the same route location as the input file
save_csv_to_s3(dataframe=self.data, bucket_name=self.bucket_name, file_name=self.output_file_name)
@staticmethod
def assert_nulls_only_from_source_nulls(data: pd.DataFrame, original_column: str, mapped_column: str) -> bool:
# We only allow nulls if the original value was null
null_vals = data[pd.isnull(data[mapped_column])]
if null_vals.empty:
return True
# We make sure all original values were null
assert pd.isnull(null_vals[original_column]).all(), (
f"Some values in {mapped_column} were not mapped, but original values were not null"
)
@staticmethod
def assert_no_nulls(data: pd.DataFrame, column: str):
assert pd.isnull(data[column]).sum() == 0, f"column {column} contains null values, but should not"
def map_construction_age_band(self):
raise NotImplementedError(
"This method should be implemented by subclasses to map construction age bands to descriptions"
)

View file

@ -0,0 +1,10 @@
from onboarders.parity import ParityOnboarder
class OnboarderFactory:
@staticmethod
def create_onboarder(onboarder_type, **kwargs):
if onboarder_type == "parity":
return ParityOnboarder(**kwargs)
raise ValueError(f"Unknown onboarder type: {onboarder_type}")

View file

@ -0,0 +1,50 @@
import json
from pydantic import BaseModel, Field
from typing import Optional, Literal
from onboarders.factory import OnboarderFactory
from utils.logger import setup_logger
logger = setup_logger()
class OnboardingEvent(BaseModel):
s3_uri: str = Field(..., description="S3 URI of the raw ARA input file")
system: Literal["parity", "generic"] = Field(..., description="Onboarding system identifier")
format: Literal["csv", "xlsx"]
sheet_name: Optional[str] = None
def handler(event, context):
"""
Lambda handler that triggers the model engine for each SQS message.
"""
for record in event.get("Records", []):
try:
event_body = json.loads(record["body"])
# Sample input data
# event_body = {
# "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for "
# "Domna.xlsx",
# "system": "parity",
# "format": "xlsx",
# "sheet_name": "Sustainability"
# }
logger.info("Processing record with body: %s", event_body)
validated_event = OnboardingEvent(**event_body)
onboarder = OnboarderFactory.create_onboarder(
validated_event.system,
fileuri=validated_event.s3_uri,
format=validated_event.format,
sheet_name=validated_event.sheet_name,
file_format=validated_event.format
)
logger.info("Transforming data")
onboarder.transform()
logger.info(f"Writing data to {onboarder.output_file_name}, bucket: {onboarder.bucket_name}")
onboarder.write()
except Exception as e:
logger.error(f"Failed to process record: {e}")

View file

@ -1,14 +0,0 @@
party_map = {
"Before 1900": 'England and Wales: before 1900',
"1900-1929": 'England and Wales: 1900-1929',
"1930-1949": 'England and Wales: 1930-1949',
"1950-1966": 'England and Wales: 1950-1966',
"1967-1975": 'England and Wales: 1967-1975',
"1976-1982": 'England and Wales: 1976-1982',
"1983-1990": 'England and Wales: 1983-1990',
"1991-1995": 'England and Wales: 1991-1995',
"1996-2002": 'England and Wales: 1996-2002',
"2003-2006": 'England and Wales: 2003-2006',
"2007-2011": 'England and Wales: 2007-2011',
"2012 onwards": 'England and Wales: 2012-2021',
}

View file

@ -1,15 +0,0 @@
parity_map = {
"MidTerrace": "Mid-Terrace",
"EndTerrace": "End-Terrace",
"Detached": "Detached",
"SemiDetached": "Semi-Detached",
"EnclosedMidTerrace": "Enclosed Mid-Terrace",
"EnclosedEndTerrace": "Enclosed End-Terrace",
}
# MidTerrace 41462
# EndTerrace 20910
# Detached 16875
# SemiDetached 14725
# EnclosedMidTerrace 3176
# EnclosedEndTerrace 2393

View file

@ -0,0 +1,19 @@
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
parity_map = {
"Before 1900": EpcConstructionAgeBand.before_1900,
"1900-1929": EpcConstructionAgeBand.from_1900_to_1929,
"1930-1949": EpcConstructionAgeBand.from_1930_to_1949,
"1950-1966": EpcConstructionAgeBand.from_1950_to_1966,
"1967-1975": EpcConstructionAgeBand.from_1967_to_1975,
"1976-1982": EpcConstructionAgeBand.from_1976_to_1982,
"1983-1990": EpcConstructionAgeBand.from_1983_to_1990,
"1991-1995": EpcConstructionAgeBand.from_1991_to_1995,
"1996-2002": EpcConstructionAgeBand.from_1996_to_2002,
"2003-2006": EpcConstructionAgeBand.from_2003_to_2006,
"2007-2011": EpcConstructionAgeBand.from_2007_to_2011,
"2012 onwards": EpcConstructionAgeBand.from_2012_onwards,
# Newer age bands, under SAP10
"2012-2022": EpcConstructionAgeBand.from_2012_to_2022,
"2023 onwards": EpcConstructionAgeBand.from_2023_onwards,
}

View file

@ -0,0 +1,60 @@
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.floor import EpcFloorDescriptions
def unknown_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
year = age_band.start_year()
if year >= 2003:
return EpcFloorDescriptions.solid_insulated_assumed
if year >= 1996:
return EpcFloorDescriptions.solid_limited_insulation_assumed
if year >= 1930:
return EpcFloorDescriptions.solid_no_insulation_assumed
return EpcFloorDescriptions.suspended_no_insulation_assumed
def unknown_floor_retrofitted(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
year = age_band.start_year()
if year >= 1930:
return EpcFloorDescriptions.solid_insulated
return EpcFloorDescriptions.suspended_insulated
def map_solid_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
year = age_band.start_year()
if year >= 2003:
return EpcFloorDescriptions.solid_insulated_assumed
if year >= 1996:
return EpcFloorDescriptions.solid_limited_insulation_assumed
return EpcFloorDescriptions.solid_no_insulation_assumed
def map_suspended_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
year = age_band.start_year()
if year >= 2003:
return EpcFloorDescriptions.suspended_insulated_assumed
if year >= 1996:
return EpcFloorDescriptions.suspended_limited_insulation_assumed
return EpcFloorDescriptions.suspended_no_insulation_assumed
as_built_floor_classifiers = {
"Solid": map_solid_floor_as_built,
"SuspendedTimber": map_suspended_floor_as_built,
"SuspendedNotTimber": map_suspended_floor_as_built,
}
unknown_as_built_floor_classifiers = {
"RetroFitted": unknown_floor_retrofitted,
"AsBuilt": unknown_floor_as_built,
"Unknown": unknown_floor_as_built,
}

View file

@ -0,0 +1,56 @@
from datatypes.epc.roof import EpcRoofDescriptions
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
def map_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions:
"""
For a flat, as built roof, these are the breakdowns:
2023 onwards Flat, insulated
20032022 Flat, insulated
19832002 Flat, insulated
19761982 Flat, limited insulation
19671975 Flat, limited insulation
19501966 and earlier Flat, no insulation
:param age_band: Input age band
:return: EpcRoofDescriptions
"""
year = age_band.start_year()
if year >= 1983:
return EpcRoofDescriptions.flat_insulated
if year >= 1967:
return EpcRoofDescriptions.flat_limited_insulation
return EpcRoofDescriptions.flat_no_insulation
def map_sloping_ceiling_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions:
"""
For a sloping ceiling, as built roof, these are the breakdowns:
2023 onwards Sloping pitched, insulated
20032022 Sloping pitched, insulated
19832002 Sloping pitched, insulated
19761982 Sloping pitched, limited insulation
19671975 and earlier Sloping pitched, no insulation
:param age_band: Input age band
:return: EpcRoofDescriptions
"""
year = age_band.start_year()
if year >= 1983:
return EpcRoofDescriptions.sloping_pitched_insulated
if year >= 1976:
return EpcRoofDescriptions.sloping_pitched_limited_insulation
return EpcRoofDescriptions.sloping_pitched_no_insulation
as_built_roof_classifiers = {
# Only need to apply this to flat and sloping ceiling roofs
"Flat": map_flat_roof,
"PitchedWithSlopingCeiling": map_sloping_ceiling_roof,
}

View file

@ -0,0 +1,113 @@
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.walls import EpcWallDescriptions
def map_cavity_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.cavity_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.cavity_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.cavity_insulated_assumed
raise NotImplementedError(f"Age band {age_band} not handled for cavity wall as built insulation mapping")
def map_solid_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.solid_brick_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.solid_brick_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.solid_brick_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for solid wall insulation mapping"
)
def map_timber_frame_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1950:
return EpcWallDescriptions.timber_frame_no_insulation_assumed
if age_band.start_year() < 1976:
return EpcWallDescriptions.timber_frame_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1976):
return EpcWallDescriptions.timber_frame_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for timber frame wall insulation mapping"
)
def map_system_build_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.system_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.system_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.system_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for system build wall insulation mapping"
)
def map_granite_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.granite_whinstone_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.granite_whinstone_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.granite_whinestone_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for granite wall insulation mapping"
)
def map_sandstone_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.sandstone_limestone_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.sandstone_limestone_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for sandstone wall insulation mapping"
)
def map_cob_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1983:
return EpcWallDescriptions.cob_as_built_average
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.cob_as_built_good
raise NotImplementedError(
f"Age band {age_band.value} not handled for cob wall insulation mapping"
)
as_built_wall_classifiers = {
"Cavity": map_cavity_wall_insulation,
"Solid Brick": map_solid_wall_insulation,
"Timber Frame": map_timber_frame_wall_insulation,
"System": map_system_build_wall_insulation,
"Granite": map_granite_wall_insulation,
"Sandstone": map_sandstone_wall_insulation,
"Cob": map_cob_wall_insulation,
}

View file

@ -0,0 +1,10 @@
from datatypes.epc.property_type_built_form import BuiltForm
parity_map = {
"MidTerrace": BuiltForm.mid_terrace,
"EndTerrace": BuiltForm.end_terrace,
"Detached": BuiltForm.detached,
"SemiDetached": BuiltForm.semi_detached,
"EnclosedMidTerrace": BuiltForm.enclosed_mid_terrace,
"EnclosedEndTerrace": BuiltForm.enclosed_end_terrace,
}

View file

@ -0,0 +1,26 @@
from numpy import nan
from datatypes.epc.floor import EpcFloorDescriptions
floor_map = {
# Solid floor
('Solid', 'AsBuilt'): None, # Mapped
('Solid', 'Unknown'): None, # Mapped
('Solid', nan): None, # Mapped
('Solid', 'RetroFitted'): EpcFloorDescriptions.solid_insulated,
# Suspended floor
('SuspendedTimber', nan): None, # Mapped suspended_floor_as_built
('SuspendedTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built
('SuspendedTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated,
('SuspendedTimber', 'Unknown'): None, # Mapped suspended_floor_as_built
('SuspendedNotTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated,
('SuspendedNotTimber', nan): None, # Mapped suspended_floor_as_built
('SuspendedNotTimber', 'Unknown'): None, # Mapped suspended_floor_as_built
('SuspendedNotTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built
# Unknown type - mapped on age
('Unknown', 'Unknown'): None, # Mapped unknown_floor_as_built
('Unknown', 'RetroFitted'): None, # Mapped unknown_floor_retrofitted
(nan, nan): None, # No actual information!
('Unknown', 'AsBuilt'): None, # Mapped unknown_floor_as_built
}

View file

@ -0,0 +1,20 @@
from datatypes.epc.efficiency import EpcEfficiency
glazing_map = {
# (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area
# For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more
"Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None),
"Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
"Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
"Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None),
# For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022
# installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to
# how we make updates to the windows data.
# Triple known data is high performance glazing with Good efficiency (at least)
"Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None),
# This is also classed as high performance glazing
"DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
# Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good)
"Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None),
"TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
}

View file

@ -0,0 +1,330 @@
from datatypes.epc.main_heating import EpcHeatingSystems
from datatypes.epc.efficiency import EpcEfficiency
from datatypes.epc.fuel import EpcFuel
from datatypes.epc.heating_controls import EpcHeatingControls
from datatypes.epc.hotwater import EpcHotWaterSystems
heating_map = {
# 0
('Boilers', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 1
('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 2
('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 3
('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 4
('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 5
('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 6
('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 7
('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 8
('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 9
('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 10
('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 11
('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 12
('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 13
('Boilers', 'C', 'LPGNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 14
('Boilers', 'C', 'LPGNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 15
('Boilers', 'C', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 16
('Boilers', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 17
('Boilers', 'C', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
('Boilers', 'C', 'OilNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 19
('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 20
('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 21
('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 22
('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 23
('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 24
('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 25
('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 26
('Boilers', 'E', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 27
('Boilers', 'E', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 28
('Boilers', 'E', 'OilNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 29
('Boilers', 'E', 'OilNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 30
('Boilers', 'F', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 31
('Boilers', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 32
('Boilers', 'F', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 33
('Boilers', 'G', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 34
('Boilers', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 35
('Boilers', 'G', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 36
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 37
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 38
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Top Spec'): (
EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 39
('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 40
('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 41
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 42
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 43
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Top Spec'): (
EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 44
('Room heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 45
('Room heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 46
('Room heaters', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 47 - water done from here
('Room heaters', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Room heaters', 'G', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Room heaters', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Room heaters', 'G', 'SmokelessCoal', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_smokeless_fuel, EpcEfficiency.VERY_POOR, EpcFuel.smokeless_coal,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Storage heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.automatic_charge_control, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Storage heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.manual_charge_control, EpcEfficiency.POOR,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Warm Air (not heat pump)', 'G', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.warm_air_electricaire, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.warm_air_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
)
}

View file

@ -0,0 +1,8 @@
from datatypes.epc.property_type_built_form import PropertyType
parity_map = {
"Flat": PropertyType.flat,
"Maisonette": PropertyType.maisonette,
"Bungalow": PropertyType.bungalow,
"House": PropertyType.house,
}

View file

@ -0,0 +1,461 @@
import pandas as pd
from numpy import nan
from typing import Union, Callable
from collections.abc import Mapping
from datatypes.epc.roof import EpcRoofDescriptions
from datatypes.epc.efficiency import EpcEfficiency
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
roof_map = {
# Dwelling above
('AnotherDwellingAbove', 'Another Dwelling Above'): EpcRoofDescriptions.another_dwelling_above,
('SameDwellingAbove', 'Same Dwelling Above'): EpcRoofDescriptions.another_dwelling_above,
# Pitched, normal loft access, with a loft thickness
('PitchedNormalLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation,
('PitchedNormalLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation,
('PitchedNormalLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation,
('PitchedNormalLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation,
('PitchedNormalLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation,
('PitchedNormalLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation,
('PitchedNormalLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation,
('PitchedNormalLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation,
('PitchedNormalLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation,
('PitchedNormalLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation,
('PitchedNormalLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation,
# Pitched, no loft access, with a loft thickness
('PitchedNormalNoLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation,
('PitchedNormalNoLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation,
('PitchedNormalNoLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation,
('PitchedNormalNoLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation,
('PitchedNormalNoLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation,
('PitchedNormalNoLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation,
('PitchedNormalNoLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation,
('PitchedNormalNoLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation,
('PitchedNormalNoLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation,
('PitchedNormalNoLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation,
('PitchedNormalNoLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation,
# All pitched options with asbuilt or unknown got to EpcRoofDescriptions.pitched_insulated_assumed
# With access
('PitchedNormalLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed,
# No access
('PitchedNormalNoLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalNoLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalNoLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed,
# Flat
('Flat', 'NoInsulation'): EpcRoofDescriptions.flat_no_insulation,
# Flat - limited insulation
('Flat', '12mm'): EpcRoofDescriptions.flat_limited_insulation,
('Flat', 'mm25'): EpcRoofDescriptions.flat_limited_insulation,
('Flat', 'mm50'): EpcRoofDescriptions.flat_limited_insulation,
# Flat insulated
('Flat', 'mm75'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm100'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm150'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm200'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm250'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm300'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm350'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm400'): EpcRoofDescriptions.flat_insulated,
# Flat - as built or unknown
('Flat', 'AsBuilt'): None, # To be classified
('Flat', nan): None, # To be classified
('Flat', 'Unknown'): None, # To be classified
# 12mm = very poor & has limited insulation description
# 25, 50 = poor & has limited insulation description
# 75, 100, 125mm = average (Flat, insulated)
# 150, 175, 200, 225, 250mm = good (Flat, insulated)
# 270mm+ = very good (Flat, insulated)
# Thatched
('PitchedThatched', 'mm50'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'mm150'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'mm300'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'Unknown'): EpcRoofDescriptions.thatched, # efficiency classified based on age
# Sloping:
# Limited (12 very poor, 25-50 poor)
('PitchedWithSlopingCeiling', 'mm12'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
('PitchedWithSlopingCeiling', 'mm25'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
('PitchedWithSlopingCeiling', 'mm50'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
# Insulated 75mm+ (75 - 125 average, 150 - 250 good, 270+ very good)
('PitchedWithSlopingCeiling', 'mm75'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm100'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm150'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm200'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm250'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm270'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm300'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm350'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm400'): EpcRoofDescriptions.sloping_pitched_insulated,
# As built/unknown
('PitchedWithSlopingCeiling', 'AsBuilt'): None, # To be classified
('PitchedWithSlopingCeiling', nan): None, # To be classified
('PitchedWithSlopingCeiling', 'Unknown'): None, #
}
roof_unknown_age_fallback = {
"Flat": EpcRoofDescriptions.flat_as_built_unknown,
"PitchedWithSlopingCeiling": EpcRoofDescriptions.sloping_pitched_as_built_unknown,
"PitchedThatched": EpcRoofDescriptions.thatched_as_built_unknown,
"PitchedNormalLoftAccess": EpcRoofDescriptions.loft_as_built_unknown,
"PitchedNormalNoLoftAccess": EpcRoofDescriptions.loft_as_built_unknown,
}
RoofEfficiencyRule = Union[
EpcEfficiency,
Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency],
]
def flat_insulated_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
before 1900, 1900-1929, 1930-1949, 1950-1966, 1967-1975 -> Pitched, no insulation, Very Poor
1976-1982 -> Pitched, limited insulation, Poor
1983-1990, to 1996-2002 Pitched, insulated, Average
2003 - 2006, 2012-2022 -> Pitched, insulated, Good
2023 onwards -> Pitched, insulated, Very Good
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
start_year = age_band.start_year()
if start_year >= 2023:
return EpcEfficiency.VERY_GOOD
if start_year >= 2003:
return EpcEfficiency.GOOD
if start_year >= 1983:
return EpcEfficiency.AVERAGE
if start_year >= 1976:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def flat_insulated_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
"""
12mm -> Very Poor
25mm - 50mm -> Poor
75mm - 125mm -> Pitched, insulated, average
150mm - 250mm -> good
270mm+ -> very good
:param insulation_thickness: Insulation thickness in mm
:return: EpcEfficiency
"""
if insulation_thickness is None:
raise ValueError("Insulation thickness is required for flat insulated efficiency calculation")
if insulation_thickness >= 270:
return EpcEfficiency.VERY_GOOD
if 150 <= insulation_thickness <= 250:
return EpcEfficiency.GOOD
if 75 <= insulation_thickness <= 125:
return EpcEfficiency.AVERAGE
if 25 <= insulation_thickness <= 50:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def flat_efficiency(insulation_thickness: int | None, age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
Combines both age band and insulation thickness to determine flat roof efficiency.
:param insulation_thickness: Insulation thickness in mm
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if insulation_thickness is not None:
return flat_insulated_efficiency_thickness(insulation_thickness)
return flat_insulated_efficiency_age_band(age_band)
def loft_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
2023 onwards -> Very Good
2012-2022 -> Very Good
2007-2011 -> Very Good
2003-2006 -> Very Good
1996-2002 -> Good
1991-1995 -> Good
1983-1990 -> Average
1976-1982 -> Average
1967-1975 -> Average
1950-1966 -> Average
1930-1949 -> Average
1900-1929 -> Average
before 1900 -> Average
:param age_band: Input age band, EpcConstructionAgeBand
:return: EpcEfficiency
"""
year = age_band.start_year()
if year >= 2003:
return EpcEfficiency.VERY_GOOD
if year >= 1991:
return EpcEfficiency.GOOD
return EpcEfficiency.AVERAGE
def thatched_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
Maps thatched roof efficiency based on construction age band.
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
year = age_band.start_year()
if year >= 2023:
return EpcEfficiency.VERY_GOOD
if year >= 2003:
return EpcEfficiency.GOOD
return EpcEfficiency.AVERAGE
def thatched_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
"""
Maps thatched roof efficiency based on insulation thickness.
:param insulation_thickness: Insulation thickness in mm
:return: EpcEfficiency
"""
if insulation_thickness is None:
raise ValueError("Insulation thickness is required for thatched efficiency calculation")
if insulation_thickness >= 175:
return EpcEfficiency.VERY_GOOD
if insulation_thickness >= 25:
return EpcEfficiency.GOOD
return EpcEfficiency.AVERAGE
def thatched_efficiency(
insulation_thickness: int | None,
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
"""
Combines both age band and insulation thickness to determine thatched roof efficiency.
:param insulation_thickness: Insulation thickness in mm
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if insulation_thickness is not None:
return thatched_efficiency_thickness(insulation_thickness)
return thatched_efficiency_age_band(age_band)
def sloping_ceiling_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
Maps sloping ceiling roof efficiency based on construction age band.
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
year = age_band.start_year()
if year >= 2023:
return EpcEfficiency.VERY_GOOD
if year >= 2003:
return EpcEfficiency.GOOD
if year >= 1983:
return EpcEfficiency.AVERAGE
if year >= 1976:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def sloping_ceiling_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
"""
Maps sloping ceiling roof efficiency based on insulation thickness.
:param insulation_thickness: Insulation thickness in mm
:return: EpcEfficiency
"""
if insulation_thickness is None:
raise ValueError("Insulation thickness is required for sloping ceiling efficiency calculation")
if insulation_thickness >= 270:
return EpcEfficiency.VERY_GOOD
if insulation_thickness >= 150:
return EpcEfficiency.GOOD
if insulation_thickness >= 75:
return EpcEfficiency.AVERAGE
if insulation_thickness >= 25:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def sloping_ceiling_efficiency(
insulation_thickness: int | None,
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
"""
Combines both age band and insulation thickness to determine sloping ceiling roof efficiency.
:param insulation_thickness: Insulation thickness in mm
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if insulation_thickness is not None:
return sloping_ceiling_efficiency_thickness(insulation_thickness)
return sloping_ceiling_efficiency_age_band(age_band)
def loft_insulated_at_rafters_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
"""
400mm, 350mm = very good
200-300mm = good
125-175 = average
50-100 = poor
25 and below= very poor
:return:
"""
if insulation_thickness is None:
raise ValueError("Insulation thickness is required for loft insulated at rafters efficiency calculation")
if insulation_thickness >= 350:
return EpcEfficiency.VERY_GOOD
if insulation_thickness >= 200:
return EpcEfficiency.GOOD
if insulation_thickness >= 125:
return EpcEfficiency.AVERAGE
if insulation_thickness >= 50:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def loft_insulated_at_rafters_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
# 2023 onwards -> Very Good
# 2003-2006, 2012-2022 -> Good
# 1983 - 1990, 1996-2002 -> Average
# 1976-1982 -> Poor
# 1967-1975 and earlier bands -> Very Poor
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
year = age_band.start_year()
if year >= 2023:
return EpcEfficiency.VERY_GOOD
if year >= 2003:
return EpcEfficiency.GOOD
if year >= 1983:
return EpcEfficiency.AVERAGE
if year >= 1976:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def loft_insulated_at_rafters_efficiency(
insulation_thickness: int | None,
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
"""
Combines both age band and insulation thickness to determine loft insulated at rafters roof efficiency.
:param insulation_thickness: Insulation thickness in mm
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if insulation_thickness is not None:
return loft_insulated_at_rafters_efficiency_thickness(insulation_thickness)
return loft_insulated_at_rafters_efficiency_age_band(age_band)
ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] = {
# Flat roof
EpcRoofDescriptions.flat_no_insulation: EpcEfficiency.VERY_POOR,
EpcRoofDescriptions.flat_limited_insulation: flat_efficiency,
EpcRoofDescriptions.flat_insulated: flat_efficiency,
# Loft:
# value mappings
EpcRoofDescriptions.loft_12mm_insulation: EpcEfficiency.VERY_POOR,
EpcRoofDescriptions.loft_25mm_insulation: EpcEfficiency.POOR,
EpcRoofDescriptions.loft_50mm_insulation: EpcEfficiency.POOR,
EpcRoofDescriptions.loft_75mm_insulation: EpcEfficiency.AVERAGE,
EpcRoofDescriptions.loft_100mm_insulation: EpcEfficiency.AVERAGE,
EpcRoofDescriptions.loft_125mm_insulation: EpcEfficiency.AVERAGE,
EpcRoofDescriptions.loft_150mm_insulation: EpcEfficiency.GOOD,
EpcRoofDescriptions.loft_175mm_insulation: EpcEfficiency.GOOD,
EpcRoofDescriptions.loft_200mm_insulation: EpcEfficiency.GOOD,
EpcRoofDescriptions.loft_250mm_insulation: EpcEfficiency.GOOD,
EpcRoofDescriptions.loft_270mm_insulation: EpcEfficiency.VERY_GOOD,
EpcRoofDescriptions.loft_300mm_insulation: EpcEfficiency.VERY_GOOD,
EpcRoofDescriptions.loft_350mm_insulation: EpcEfficiency.VERY_GOOD,
EpcRoofDescriptions.loft_400mm_plus_insulation: EpcEfficiency.VERY_GOOD,
EpcRoofDescriptions.pitched_no_insulation: EpcEfficiency.VERY_POOR,
# function mappings
EpcRoofDescriptions.pitched_insulated_assumed: loft_insulated_efficiency,
# Loft af rafters
EpcRoofDescriptions.loft_insulated_at_rafters: loft_insulated_at_rafters_efficiency,
# Another dwelling above
EpcRoofDescriptions.another_dwelling_above: EpcEfficiency.NA,
# Thatched
EpcRoofDescriptions.thatched: thatched_efficiency,
EpcRoofDescriptions.thatched_with_additional_insulation: thatched_efficiency,
# Sloping ceiling
EpcRoofDescriptions.sloping_pitched_insulated: sloping_ceiling_efficiency,
EpcRoofDescriptions.sloping_pitched_limited_insulation: sloping_ceiling_efficiency,
EpcRoofDescriptions.sloping_pitched_no_insulation: EpcEfficiency.VERY_POOR,
}
def resolve_roof_efficiency(
description: EpcRoofDescriptions,
age_band: EpcConstructionAgeBand | None,
insulation_thickness: int | None,
) -> EpcEfficiency:
"""
Resolve roof efficiency from description + age band + insulation thickness.
"""
# Unknown / holding descriptions → efficiency unknown
if description in description.unknown_descriptions:
return EpcEfficiency.NA
rule = ROOF_DESCRIPTION_EFFICIENCIES.get(description)
if rule is None:
return EpcEfficiency.NA
# Fixed efficiency
if isinstance(rule, EpcEfficiency):
return rule
# Callable rule
if age_band is None or pd.isnull(age_band):
return EpcEfficiency.NA
try:
# Try (thickness, age_band)
return rule(insulation_thickness, age_band)
except TypeError:
# Fallback to (age_band)
return rule(age_band)

View file

@ -0,0 +1,211 @@
from typing import Callable, Union
from collections.abc import Mapping
from datatypes.epc.walls import EpcWallDescriptions
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.efficiency import EpcEfficiency
# Unique combinations
wall_map = {
# Cavity walls
('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity,
('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation,
('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation,
('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal,
('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external,
('Cavity', 'AsBuilt'): None, # To be classified
('Cavity', 'Unknown'): None, # To be classified
# System built walls
('System', 'External'): EpcWallDescriptions.system_external_insulation,
('System', 'Internal'): EpcWallDescriptions.system_internal_insulation,
('System', 'AsBuilt'): None, # To be classified
('System', 'Unknown'): None,
# Timber Frame walls
('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation,
('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation,
('Timber Frame', 'AsBuilt'): None, # To be classified
('Timber Frame', 'Unknown'): None,
# Solid Brick walls
('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation,
('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation,
('Solid Brick', 'AsBuilt'): None, # To be classified
('Solid Brick', 'Unknown'): None,
# Granite walls
('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation,
("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation,
('Granite', 'AsBuilt'): None,
('Granite', 'Unknown'): None,
# Sandstone walls
('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation,
('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation,
('Sandstone', 'Unknown'): None,
('Sandstone', 'AsBuilt'): None,
# Cob walls
('Cob', 'AsBuilt'): None,
}
wall_unknown_age_fallback = {
"Cavity": EpcWallDescriptions.cavity_as_built_unknown,
"Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown,
"Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown,
"System": EpcWallDescriptions.system_as_built_unknown,
"Granite": EpcWallDescriptions.granite_as_built_unknown,
"Sandstone": EpcWallDescriptions.sandstone_as_built_unknown,
"Cob": EpcWallDescriptions.cob_as_built_unknown,
}
def cavity_filled_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
""""
Maps cavity filled to efficiency based on construction age band.
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if age_band in {
EpcConstructionAgeBand.from_2023_onwards
}:
return EpcEfficiency.VERY_GOOD
return EpcEfficiency.GOOD
def internal_external_insulation_efficiency(
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
"""
Maps:
- cavity unfilled with internal/external insulation to efficiency based on construction age band. We assumed
based on 100mm insulation
- solid brick with internal/external insulation to efficiency based on construction age band. We assumed
based on 100mm insulation
- system built with internal/external insulation to efficiency based on construction age band. We assumed
based on 100mm insulation
All of these wall types have the same behaviour in elmhurst
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if age_band in {
EpcConstructionAgeBand.from_1983_to_1990,
EpcConstructionAgeBand.from_1991_to_1995,
EpcConstructionAgeBand.from_1996_to_2002,
EpcConstructionAgeBand.from_2003_to_2006,
EpcConstructionAgeBand.from_2007_to_2011,
EpcConstructionAgeBand.from_2012_to_2022,
EpcConstructionAgeBand.from_2023_onwards,
}:
return EpcEfficiency.VERY_GOOD
return EpcEfficiency.GOOD
def timber_granite_sandstone_internal_external_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
""""
Maps:
- timber frame with internal/external wall insulation to efficiency based on construction age band.
- sandstone/limestone with internal/external wall insulation to efficiency based on construction age band.
- granite/whinstone with internal/external wall insulation to efficiency based on construction age band.
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if age_band in {
EpcConstructionAgeBand.from_2023_onwards
}:
return EpcEfficiency.VERY_GOOD
return EpcEfficiency.GOOD
WallEfficiencyRule = Union[
EpcEfficiency,
Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency],
]
WALL_DESCRIPTION_EFFICIENCIES: Mapping[EpcWallDescriptions, WallEfficiencyRule] = {
# Note: all function mappings have been defined based on Elmhurst
# Cavity
# value mappings
EpcWallDescriptions.cavity_no_insulation_assumed: EpcEfficiency.POOR,
EpcWallDescriptions.cavity_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.cavity_insulated_assumed: EpcEfficiency.GOOD,
EpcWallDescriptions.cavity_filled_plus_internal: EpcEfficiency.VERY_GOOD,
EpcWallDescriptions.cavity_filled_plus_external: EpcEfficiency.VERY_GOOD,
# function mappings
EpcWallDescriptions.cavity_filled_cavity: cavity_filled_efficiency,
EpcWallDescriptions.cavity_internal_insulation: internal_external_insulation_efficiency,
EpcWallDescriptions.cavity_external_insulation: internal_external_insulation_efficiency,
# Solid brick
# value mappings
EpcWallDescriptions.solid_brick_no_insulation_assumed: EpcEfficiency.POOR,
EpcWallDescriptions.solid_brick_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.solid_brick_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.solid_brick_internal_insulation: internal_external_insulation_efficiency,
EpcWallDescriptions.solid_brick_external_insulation: internal_external_insulation_efficiency,
# System
# value mappings
EpcWallDescriptions.system_no_insulation_assumed: EpcEfficiency.POOR,
EpcWallDescriptions.system_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.system_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.system_internal_insulation: internal_external_insulation_efficiency,
EpcWallDescriptions.system_external_insulation: internal_external_insulation_efficiency,
# Timber frame
# value mappings
EpcWallDescriptions.timber_frame_no_insulation_assumed: EpcEfficiency.POOR,
EpcWallDescriptions.timber_frame_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.timber_frame_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.timber_frame_internal_insulation: timber_granite_sandstone_internal_external_efficiency,
EpcWallDescriptions.timber_frame_external_insulation: timber_granite_sandstone_internal_external_efficiency,
# Granite / whinstone
EpcWallDescriptions.granite_whinstone_no_insulation_assumed: EpcEfficiency.VERY_POOR,
EpcWallDescriptions.granite_whinstone_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.granite_whinestone_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.granite_whinstone_internal_insulation: timber_granite_sandstone_internal_external_efficiency,
EpcWallDescriptions.granite_whinstone_external_insulation: timber_granite_sandstone_internal_external_efficiency,
# Sandstone / limestone
EpcWallDescriptions.sandstone_limestone_no_insulation_assumed: EpcEfficiency.VERY_POOR,
EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.sandstone_limestone_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.sandstone_limestone_internal_insulation: timber_granite_sandstone_internal_external_efficiency,
EpcWallDescriptions.sandstone_limestone_external_insulation: timber_granite_sandstone_internal_external_efficiency,
# Cob (special case)
EpcWallDescriptions.cob_as_built_average: EpcEfficiency.AVERAGE,
EpcWallDescriptions.cob_as_built_good: EpcEfficiency.GOOD,
# Unknown mappings which are unhandled
EpcWallDescriptions.cavity_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.solid_brick_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.system_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.timber_frame_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.granite_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.sandstone_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.cob_as_built_unknown: EpcEfficiency.NA,
}
def resolve_wall_efficiency(
description: EpcWallDescriptions,
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
rule = WALL_DESCRIPTION_EFFICIENCIES[description]
if isinstance(rule, EpcEfficiency):
return rule
return rule(age_band)

View file

@ -1,6 +0,0 @@
parity_map = {
"Flat": "Flat",
"Maisonette": "Maisonette",
"Bungalow": "Bungalow",
"House": "House",
}

View file

@ -1,3 +0,0 @@
parity_map = {
}

View file

@ -1,93 +1,371 @@
import re
from tqdm import tqdm
import pandas as pd
from etl.epc.DataProcessor import construction_age_bounds_map
from backend.onboarders.mappings.property_type import parity_map as property_map
from backend.onboarders.mappings.age_band import party_map as age_band_map
from backend.onboarders.mappings.built_form import parity_map as built_form_map
def check_nulls(data, original_column, mapped_column):
# We only allow nulls if the oroginal value was null
null_vals = data[pd.isnull(data[mapped_column])]
if null_vals.empty:
return True
# We make sure all original values were null
assert pd.isnull(null_vals[original_column]).all(), (
f"Some values in {mapped_column} were not mapped, but original values were not null"
)
# Sample input data
data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Sustainability"
from backend.onboarders.base import OnboarderBase
# Parity mappings
from backend.onboarders.mappings.parity.property_type import parity_map as property_map
from backend.onboarders.mappings.parity.age_band import parity_map as age_band_map
from backend.onboarders.mappings.parity.built_form import parity_map as built_form_map
from backend.onboarders.mappings.parity.walls import wall_map, wall_unknown_age_fallback, WALL_DESCRIPTION_EFFICIENCIES
from onboarders.mappings.parity.roof import roof_map, roof_unknown_age_fallback, resolve_roof_efficiency
from onboarders.mappings.parity.floor import floor_map
from onboarders.mappings.parity.heating import heating_map
from onboarders.mappings.parity.glazing import glazing_map
from backend.onboarders.mappings.parity.as_built_wall_classifiers import as_built_wall_classifiers
from backend.onboarders.mappings.parity.as_built_roof_classifiers import as_built_roof_classifiers
from backend.onboarders.mappings.parity.as_built_floor_classifiers import (
as_built_floor_classifiers, unknown_as_built_floor_classifiers
)
from datatypes.epc.roof import EpcRoofDescriptions
from datatypes.epc.floor import EpcFloorDescriptions
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.walls import EpcWallDescriptions
from datatypes.epc.efficiency import EpcEfficiency
# We want to map the parity fields to standard EPC references. This will allow us to
# 1) Estimate EPCs, more accurately
# 2) Patch incorrect EPCs with ease
# 3) Indicate already installed measures
# ------------ construction_age_band ------------
# Map to EPC age bands
# def construction_date_to_band(year):
# if pd.isnull(year):
# return None
# # Get the year from the date which is numpy datetime format
# for label, ranges in construction_age_bounds_map.items():
# if ranges["l"] <= year <= ranges["u"]:
# return label
# raise NotImplementedError("year out of bounds")
#
#
# data["construction_age_band"] = pd.to_datetime(data["Construction Date"]).dt.year.apply(construction_date_to_band)
data["construction_age_band"] = data["Construction Years"].map(age_band_map)
check_nulls(data, "Construction Years", "construction_age_band")
# ------------ property_type ------------
data["property_type"] = data["Type"].map(property_map)
assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped"
# ------------ built_form ------------
data["built_form"] = data["Attachment"].map(built_form_map)
assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped"
# ------------ Wall Construction ------------
data["walls_combined"] = data["Wall Construction"] + "+" + data["Wall Insulation"].fillna("Unknown Insulation")
data["Wall Insulation"].value_counts()
data["Wall Construction"].value_counts()
as_built_map = {
"Cavity": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Solid Brick": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"System": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Timber Frame": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Sandstone": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Granite": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Cob": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
}
tqdm.pandas()
def map_wall_construction(wall_constuction, wall_insulation, construction_age_band):
if wall_insulation == "AsBuilt":
# Deduce based on wall construction and age band
bands = as_built_map.get(wall_constuction, None)
if bands is None:
raise NotImplementedError(f"Wall construction {wall_constuction} not in as built map")
class ParityOnboarder(OnboarderBase):
# We check if the age band is in insulated or partial insulated, and if neither, we assume uninsulated
def __init__(
self,
fileuri: str,
file_format: str,
**kwargs
):
# Extract bucket, and filekey; Will be in the format s3://bucket/key
self.bucket_name = fileuri.split("/")[2]
self.input_file_name = "/".join(fileuri.split("/")[3:])
# Also prepare output file name
self.output_file_name = self.input_file_name.replace("." + file_format, "") + "_transformed.csv"
# Variables we want to map
# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type',
# 'Attachment', 'Construction Years', 'Wall Construction',
# 'Wall Insulation', 'Roof Construction', 'Roof Insulation',
# 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating',
# 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN',
# 'Total Floor Area (m2)'
self.read_s3(file_format=file_format, **kwargs)
pass
def map_construction_age_band(self):
self.data[self.landlord_construction_age_band] = self.data["Construction Years"].map(age_band_map)
self.assert_nulls_only_from_source_nulls(
self.data, "Construction Years", self.landlord_construction_age_band
)
def map_property_type(self):
self.data[self.landlord_property_type] = self.data["Type"].map(property_map)
self.assert_no_nulls(self.data, self.landlord_property_type)
def map_built_form(self):
self.data[self.landlord_built_form] = self.data["Attachment"].map(built_form_map)
self.assert_no_nulls(self.data, self.landlord_built_form)
@staticmethod
def _fill_wall_as_built(row: pd.Series) -> EpcWallDescriptions | None:
"""
Utility function, used by map_wall_construction in parity transformation module
:param row: row of input sustainability data, being transformed
:return: EpcWallDescriptions, the as built wall description for the input row, based on the wall construction
type and age band
"""
# Already resolved via direct mapping
if row.landlord_wall_construction is not None:
return row.landlord_wall_construction
wall_type = row["Wall Construction"]
# Missing construction age → conservative fallback
if pd.isnull(row.landlord_construction_age_band):
return wall_unknown_age_fallback.get(wall_type)
classifier = as_built_wall_classifiers.get(wall_type)
if classifier is None:
return None
return classifier(row.landlord_construction_age_band)
@staticmethod
def _resolve_wall_efficiency(
description: EpcWallDescriptions,
age_band: EpcConstructionAgeBand | None,
) -> EpcEfficiency:
# Unknown / holding descriptions → efficiency unknown
if "unknown insulation" in description.value.lower():
return EpcEfficiency.NA
rule = WALL_DESCRIPTION_EFFICIENCIES.get(description)
if rule is None:
return EpcEfficiency.NA
if isinstance(rule, EpcEfficiency):
return rule
# Rule needs age band but we don't have one
if age_band is None or pd.isnull(age_band):
return EpcEfficiency.NA
return rule(age_band)
def map_wall_construction(self):
self.data[self.landlord_wall_construction] = (
self.data[["Wall Construction", "Wall Insulation"]]
.apply(tuple, axis=1)
.map(wall_map)
)
self.data[self.landlord_wall_construction] = self.data.progress_apply(self._fill_wall_as_built, axis=1)
# Sanity check
self.assert_no_nulls(self.data, self.landlord_wall_construction)
self.data[self.landlord_wall_efficiency] = self.data.progress_apply(
lambda row: self._resolve_wall_efficiency(
row.landlord_wall_construction,
row.landlord_construction_age_band,
),
axis=1,
)
# Additional santify check
self.assert_no_nulls(self.data, self.landlord_wall_efficiency)
@staticmethod
def _fill_roof_as_built(row: pd.Series) -> EpcRoofDescriptions | None:
# Already resolved
if not pd.isnull(row.landlord_roof_construction):
return row.landlord_roof_construction
roof_type = row["Roof Construction"]
classifier = as_built_roof_classifiers.get(roof_type)
if classifier is None:
raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'")
if pd.isnull(row.landlord_construction_age_band):
return roof_unknown_age_fallback.get(roof_type)
output = classifier(row.landlord_construction_age_band)
if output is None:
raise NotImplementedError(
f"Roof classification returned None for roof type '{roof_type}'"
)
return output
@staticmethod
def _extract_insulation_thickness(value: str | None) -> int | None:
"""
Extract insulation thickness in mm from a string like 'mm150'.
Returns None if not present or not parseable.
"""
if value is None or pd.isnull(value):
return None
match = re.search(r"(\d+)", str(value))
if not match:
return None
return int(match.group(1))
def map_roof_construction(self):
self.data[self.landlord_roof_construction] = (
self.data[["Roof Construction", "Roof Insulation"]]
.progress_apply(tuple, axis=1)
.map(roof_map)
)
self.data[self.landlord_roof_construction] = self.data.progress_apply(
self._fill_roof_as_built,
axis=1,
)
# sanity check
self.assert_no_nulls(self.data, self.landlord_roof_construction)
self.data["roof_insulation_thickness_mm"] = self.data["Roof Insulation"].apply(
self._extract_insulation_thickness
)
self.data[self.landlord_roof_efficiency] = self.data.progress_apply(
lambda row: resolve_roof_efficiency(
description=row.landlord_roof_construction,
age_band=row.landlord_construction_age_band,
insulation_thickness=row.roof_insulation_thickness_mm,
),
axis=1,
)
# sanity check
self.assert_no_nulls(self.data, self.landlord_roof_efficiency)
# Flag sloping ceiling
self.data[self.landlord_has_sloping_ceiling] = self.data["Roof Construction"].apply(
lambda x: x == "PitchedWithSlopingCeiling"
)
@staticmethod
def _fill_floor_as_built(row: pd.Series):
# 1. Already resolved
if row.landlord_floor_construction is not None:
return row.landlord_floor_construction
age_band = row.landlord_construction_age_band
floor_type = row["Floor Construction"]
insulation = row["Floor Insulation"]
# 2. Missing age band → conservative fallback
if pd.isnull(age_band):
return EpcFloorDescriptions.unknown
# 3. Known floor types
if floor_type in ["Solid", "SuspendedTimber", "SuspendedNotTimber"]:
classifier = as_built_floor_classifiers[floor_type]
return classifier(age_band)
# 4. Unknown floor type
if floor_type == "Unknown":
classifier = unknown_as_built_floor_classifiers[insulation]
return classifier(age_band)
# 5. Truly missing / garbage input
return EpcFloorDescriptions.unknown
def map_floor_construction(self):
self.data[self.landlord_floor_construction] = (
self.data[["Floor Construction", "Floor Insulation"]]
.progress_apply(tuple, axis=1)
.map(floor_map)
)
self.data[self.landlord_floor_construction] = self.data.progress_apply(
self._fill_floor_as_built,
axis=1,
)
self.assert_no_nulls(self.data, self.landlord_floor_construction)
def map_glazing(self):
# TODO: probably doesn't make sense to store multi glazed proportion, glazed type or glazed area.
# There is maybe an argument for landlord_multi_glaze_proportion as this could be variable,
# however
self.data[
[
self.landlord_windows_type,
self.landlord_windows_efficiency,
self.landlord_multi_glaze_proportion,
self.landlord_glazed_type,
self.landlord_glazed_area
]
] = self.data["Glazing"].map(glazing_map).progress_apply(pd.Series)
def map_heating(self):
# TODO - when mapping heating controls, we should check the existing heating controls and the efficiency rating
# For sub optimal heating controls, we're going to make an assumption as to what the heating controls are
# and the energy efficiency rating we prescribe here may not be accurate. We therefore use this as an
# upper limit
# as opposed to a guaranteed efficiency rating. To stress, this is only relevant for sub optimal heating
# controls. E.g. it may be programmer and room thermostat
self.data[
[
self.landlord_heating_construction,
self.landlord_heating_efficiency,
self.landlord_fuel_type,
self.landlord_heating_controls,
self.landlord_heating_controls_efficiency,
self.landlord_hot_water_system,
self.landlord_hot_water_efficiency
]
] = self.data[
[
"Heating",
"Boiler Efficiency",
"Main Fuel",
"Controls Adequacy"
]
].progress_apply(tuple, axis=1).map(heating_map).progress_apply(pd.Series)
def map_floor_area(self):
# This is just a rename
self.data = self.data.rename(
columns={"Total Floor Area (m2)": self.landlord_total_floor_area_m2}
)
def select_columns(self):
self.data = self.data[
[
"Org Ref",
"UPRN",
"Address 1",
"Address 2",
"Address 3",
"Postcode",
self.landlord_total_floor_area_m2,
self.landlord_construction_age_band,
self.landlord_property_type,
self.landlord_built_form,
self.landlord_wall_construction,
self.landlord_wall_efficiency,
self.landlord_roof_construction,
self.landlord_roof_efficiency,
self.landlord_has_sloping_ceiling,
self.landlord_floor_construction,
self.landlord_windows_type,
self.landlord_windows_efficiency,
self.landlord_multi_glaze_proportion,
self.landlord_glazed_type,
self.landlord_glazed_area,
self.landlord_heating_construction,
self.landlord_heating_efficiency,
self.landlord_fuel_type,
self.landlord_heating_controls,
self.landlord_heating_controls_efficiency,
self.landlord_hot_water_system,
self.landlord_hot_water_efficiency
]
].rename(
columns={
"Org Ref": "landlord_property_id",
"Address1": "address1",
"Address2": "address2",
"Address3": "address3",
"Postcode": "postcode",
}
)
def extract_values(self):
for columns in [
self.landlord_construction_age_band, self.landlord_property_type, self.landlord_built_form,
self.landlord_wall_construction, self.landlord_wall_efficiency, self.landlord_roof_construction,
self.landlord_roof_efficiency, self.landlord_floor_construction, self.landlord_windows_type,
self.landlord_windows_efficiency, self.landlord_heating_construction, self.landlord_heating_efficiency,
self.landlord_fuel_type, self.landlord_heating_controls, self.landlord_heating_controls_efficiency,
self.landlord_hot_water_system, self.landlord_hot_water_efficiency
]:
self.data[columns] = self.data[columns].progress_apply(lambda x: x.value if hasattr(x, "value") else x)
def transform(self):
# ------------ construction_age_band ------------
self.map_construction_age_band()
# ------------ property_type ------------
self.map_property_type()
# ------------ built_form ------------
self.map_built_form()
# ------------ Wall Construction ------------
self.map_wall_construction()
# ------------ Roof Construction ------------
self.map_roof_construction()
# ------------ Floor Construction ------------
self.map_floor_construction()
# ------------ Glazing ------------
self.map_glazing()
# ------------ Heating, fuel, controls & hot water ------------
self.map_heating()
# ------------ Floor Area ------------
self.map_floor_area()
# ------------ Formating ------------
self.select_columns()
self.extract_values()

View file

@ -0,0 +1,6 @@
boto3
numpy==2.1.2
pandas==2.2.3
tqdm==4.66.5
pydantic==2.9.2
openpyxl==3.1.2

View file

@ -0,0 +1,97 @@
import pytest
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.floor import EpcFloorDescriptions
from backend.onboarders.mappings.parity.as_built_floor_classifiers import (
unknown_floor_as_built,
unknown_floor_retrofitted,
map_solid_floor_as_built,
map_suspended_floor_as_built,
)
@pytest.mark.parametrize(
"age_band,expected",
[
# Before 1900 / 19001929 → suspended, no insulation
(EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_no_insulation_assumed),
(EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_no_insulation_assumed),
# 19301995 → solid, no insulation
(EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1967_to_1975, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed),
# 19962002 → solid, limited insulation
(EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed),
# 2003+ → solid, insulated
(EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed),
(EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed),
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed),
],
)
def test_unknown_floor_as_built(age_band, expected):
assert unknown_floor_as_built(age_band) == expected
@pytest.mark.parametrize(
"age_band,expected",
[
# Pre-1930 → suspended, insulated
(EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_insulated),
(EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_insulated),
# 1930+ → solid, insulated
(EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_insulated),
(EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_insulated),
(EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_insulated),
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated),
],
)
def test_unknown_floor_retrofitted(age_band, expected):
assert unknown_floor_retrofitted(age_band) == expected
@pytest.mark.parametrize(
"age_band,expected",
[
# 19831995 → no insulation
(EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed),
# 19962002 → limited insulation
(EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed),
# 2003+ → insulated
(EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed),
(EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed),
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed),
],
)
def test_solid_floor_as_built(age_band, expected):
assert map_solid_floor_as_built(age_band) == expected
@pytest.mark.parametrize(
"age_band,expected",
[
# 19831995 → no insulation
(EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.suspended_no_insulation_assumed),
(EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.suspended_no_insulation_assumed),
# 19962002 → limited insulation
(EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.suspended_limited_insulation_assumed),
# 2003+ → insulated
(EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.suspended_insulated_assumed),
(EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.suspended_insulated_assumed),
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.suspended_insulated_assumed),
],
)
def test_suspended_floor_as_built(age_band, expected):
assert map_suspended_floor_as_built(age_band) == expected

View file

@ -0,0 +1,173 @@
import pytest
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.roof import EpcRoofDescriptions
from datatypes.epc.efficiency import EpcEfficiency
from backend.onboarders.mappings.parity.as_built_roof_classifiers import (
map_flat_roof,
map_sloping_ceiling_roof,
)
from backend.onboarders.mappings.parity.roof import resolve_roof_efficiency
# ---------------------------------------------------------------------
# As-built roof description classification
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcRoofDescriptions.flat_no_insulation),
(EpcConstructionAgeBand.from_1950_to_1966, EpcRoofDescriptions.flat_no_insulation),
(EpcConstructionAgeBand.from_1967_to_1975, EpcRoofDescriptions.flat_limited_insulation),
(EpcConstructionAgeBand.from_1976_to_1982, EpcRoofDescriptions.flat_limited_insulation),
(EpcConstructionAgeBand.from_1983_to_1990, EpcRoofDescriptions.flat_insulated),
(EpcConstructionAgeBand.from_2007_to_2011, EpcRoofDescriptions.flat_insulated),
(EpcConstructionAgeBand.from_2023_onwards, EpcRoofDescriptions.flat_insulated),
],
)
def test_classify_flat_roof(age_band, expected):
assert map_flat_roof(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcRoofDescriptions.sloping_pitched_no_insulation),
(EpcConstructionAgeBand.from_1967_to_1975, EpcRoofDescriptions.sloping_pitched_no_insulation),
(EpcConstructionAgeBand.from_1976_to_1982, EpcRoofDescriptions.sloping_pitched_limited_insulation),
(EpcConstructionAgeBand.from_1983_to_1990, EpcRoofDescriptions.sloping_pitched_insulated),
(EpcConstructionAgeBand.from_2012_to_2022, EpcRoofDescriptions.sloping_pitched_insulated),
(EpcConstructionAgeBand.from_2023_onwards, EpcRoofDescriptions.sloping_pitched_insulated),
],
)
def test_classify_sloping_ceiling_roof(age_band, expected):
assert map_sloping_ceiling_roof(age_band) == expected
# ---------------------------------------------------------------------
# Roof efficiency — fixed & age-band driven
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description, age_band, expected",
[
# Flat roof, no insulation
(EpcRoofDescriptions.flat_no_insulation, EpcConstructionAgeBand.before_1900, EpcEfficiency.VERY_POOR),
# Flat roof, limited insulation (age-band driven)
(EpcRoofDescriptions.flat_limited_insulation, EpcConstructionAgeBand.from_1976_to_1982, EpcEfficiency.POOR),
(
EpcRoofDescriptions.flat_limited_insulation, EpcConstructionAgeBand.from_1967_to_1975,
EpcEfficiency.VERY_POOR),
# Flat roof, insulated (age-band driven)
(EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_1983_to_1990, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_2003_to_2006, EpcEfficiency.GOOD),
(EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_2023_onwards, EpcEfficiency.VERY_GOOD),
# Pitched, insulated assumed (loft)
(EpcRoofDescriptions.pitched_insulated_assumed, EpcConstructionAgeBand.from_1996_to_2002, EpcEfficiency.GOOD),
(EpcRoofDescriptions.pitched_insulated_assumed, EpcConstructionAgeBand.from_2007_to_2011,
EpcEfficiency.VERY_GOOD),
],
)
def test_roof_efficiency_age_band_only(description, age_band, expected):
assert resolve_roof_efficiency(
description=description,
age_band=age_band,
insulation_thickness=None,
) == expected
# ---------------------------------------------------------------------
# Roof efficiency — insulation thickness driven
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description, thickness, expected",
[
# Loft insulation
(EpcRoofDescriptions.loft_12mm_insulation, 12, EpcEfficiency.VERY_POOR),
(EpcRoofDescriptions.loft_25mm_insulation, 25, EpcEfficiency.POOR),
(EpcRoofDescriptions.loft_75mm_insulation, 75, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.loft_150mm_insulation, 150, EpcEfficiency.GOOD),
(EpcRoofDescriptions.loft_300mm_insulation, 300, EpcEfficiency.VERY_GOOD),
# Flat insulated — thickness overrides age band
(EpcRoofDescriptions.flat_insulated, 50, EpcEfficiency.POOR),
(EpcRoofDescriptions.flat_insulated, 100, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.flat_insulated, 200, EpcEfficiency.GOOD),
(EpcRoofDescriptions.flat_insulated, 300, EpcEfficiency.VERY_GOOD),
# Sloping ceiling
(EpcRoofDescriptions.sloping_pitched_insulated, 75, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.sloping_pitched_insulated, 150, EpcEfficiency.GOOD),
(EpcRoofDescriptions.sloping_pitched_insulated, 350, EpcEfficiency.VERY_GOOD),
],
)
def test_roof_efficiency_thickness_based(description, thickness, expected):
assert resolve_roof_efficiency(
description=description,
age_band=EpcConstructionAgeBand.before_1900, # should be ignored
insulation_thickness=thickness,
) == expected
# ---------------------------------------------------------------------
# Thatched roofs
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description, age_band, expected",
[
(EpcRoofDescriptions.thatched, EpcConstructionAgeBand.before_1900, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.thatched, EpcConstructionAgeBand.from_2003_to_2006, EpcEfficiency.GOOD),
(EpcRoofDescriptions.thatched, EpcConstructionAgeBand.from_2023_onwards, EpcEfficiency.VERY_GOOD),
],
)
def test_thatched_efficiency_age_band(description, age_band, expected):
assert resolve_roof_efficiency(
description=description,
age_band=age_band,
insulation_thickness=None,
) == expected
@pytest.mark.parametrize(
"thickness, expected",
[
(12, EpcEfficiency.AVERAGE),
(50, EpcEfficiency.GOOD),
(150, EpcEfficiency.GOOD),
(200, EpcEfficiency.VERY_GOOD),
],
)
def test_thatched_efficiency_thickness(thickness, expected):
assert resolve_roof_efficiency(
description=EpcRoofDescriptions.thatched_with_additional_insulation,
age_band=EpcConstructionAgeBand.before_1900,
insulation_thickness=thickness,
) == expected
# ---------------------------------------------------------------------
# Unknown / holding descriptions
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description",
[
EpcRoofDescriptions.flat_as_built_unknown,
EpcRoofDescriptions.loft_as_built_unknown,
EpcRoofDescriptions.thatched_as_built_unknown,
EpcRoofDescriptions.sloping_pitched_as_built_unknown,
],
)
def test_unknown_roof_descriptions_return_na(description):
assert resolve_roof_efficiency(
description=description,
age_band=None,
insulation_thickness=None,
) == EpcEfficiency.NA

View file

@ -0,0 +1,161 @@
import pytest
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.walls import EpcWallDescriptions
from datatypes.epc.efficiency import EpcEfficiency
from backend.onboarders.mappings.parity.walls import resolve_wall_efficiency
from backend.onboarders.mappings.parity.as_built_wall_classifiers import (
map_cavity_wall_insulation,
map_solid_wall_insulation,
map_timber_frame_wall_insulation,
map_system_build_wall_insulation,
map_granite_wall_insulation,
map_sandstone_wall_insulation,
map_cob_wall_insulation,
)
# ---------------------------------------------------------------------
# As-built wall description classification
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.cavity_no_insulation_assumed),
(EpcConstructionAgeBand.from_1950_to_1966, EpcWallDescriptions.cavity_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.cavity_partial_insulated_assumed),
(EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.cavity_insulated_assumed),
(EpcConstructionAgeBand.from_2023_onwards, EpcWallDescriptions.cavity_insulated_assumed),
],
)
def test_map_cavity_wall_insulation(age_band, expected):
assert map_cavity_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.solid_brick_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.solid_brick_partial_insulated_assumed),
(EpcConstructionAgeBand.from_1996_to_2002, EpcWallDescriptions.solid_brick_insulated_assumed),
],
)
def test_map_solid_wall_insulation(age_band, expected):
assert map_solid_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.timber_frame_no_insulation_assumed),
(EpcConstructionAgeBand.from_1950_to_1966, EpcWallDescriptions.timber_frame_partial_insulated_assumed),
(EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.timber_frame_insulated_assumed),
],
)
def test_map_timber_frame_wall_insulation(age_band, expected):
assert map_timber_frame_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.system_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.system_partial_insulated_assumed),
(EpcConstructionAgeBand.from_2003_to_2006, EpcWallDescriptions.system_insulated_assumed),
],
)
def test_map_system_wall_insulation(age_band, expected):
assert map_system_build_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.granite_whinstone_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.granite_whinstone_partial_insulated_assumed),
(EpcConstructionAgeBand.from_2012_to_2022, EpcWallDescriptions.granite_whinestone_insulated_assumed),
],
)
def test_map_granite_wall_insulation(age_band, expected):
assert map_granite_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.sandstone_limestone_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed),
(EpcConstructionAgeBand.from_2007_to_2011, EpcWallDescriptions.sandstone_limestone_insulated_assumed),
],
)
def test_map_sandstone_wall_insulation(age_band, expected):
assert map_sandstone_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.cob_as_built_average),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.cob_as_built_average),
(EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.cob_as_built_good),
],
)
def test_map_cob_wall_insulation(age_band, expected):
assert map_cob_wall_insulation(age_band) == expected
# ---------------------------------------------------------------------
# Wall efficiency resolution
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description, age_band, expected",
[
# Fixed efficiencies
(EpcWallDescriptions.cavity_no_insulation_assumed, None, EpcEfficiency.POOR),
(EpcWallDescriptions.cavity_partial_insulated_assumed, None, EpcEfficiency.AVERAGE),
(EpcWallDescriptions.cavity_insulated_assumed, None, EpcEfficiency.GOOD),
# Function-based efficiencies
(
EpcWallDescriptions.cavity_filled_cavity,
EpcConstructionAgeBand.from_2023_onwards,
EpcEfficiency.VERY_GOOD,
),
(
EpcWallDescriptions.cavity_filled_cavity,
EpcConstructionAgeBand.from_1991_to_1995,
EpcEfficiency.GOOD,
),
(
EpcWallDescriptions.solid_brick_internal_insulation,
EpcConstructionAgeBand.from_2003_to_2006,
EpcEfficiency.VERY_GOOD,
),
(
EpcWallDescriptions.solid_brick_internal_insulation,
EpcConstructionAgeBand.from_1950_to_1966,
EpcEfficiency.GOOD,
),
],
)
def test_resolve_wall_efficiency(description, age_band, expected):
assert resolve_wall_efficiency(description, age_band) == expected
@pytest.mark.parametrize(
"description",
[
EpcWallDescriptions.cavity_as_built_unknown,
EpcWallDescriptions.solid_brick_as_built_unknown,
EpcWallDescriptions.system_as_built_unknown,
EpcWallDescriptions.timber_frame_as_built_unknown,
EpcWallDescriptions.granite_as_built_unknown,
EpcWallDescriptions.sandstone_as_built_unknown,
EpcWallDescriptions.cob_as_built_unknown,
],
)
def test_unknown_wall_descriptions_return_na(description):
assert resolve_wall_efficiency(description, None) == EpcEfficiency.NA

View file

@ -0,0 +1,9 @@
FROM public.ecr.aws/lambda/python:3.10
# Set working directory (Lambda task root)
WORKDIR /var/task
# -----------------------------
# Lambda handler
# -----------------------------
CMD ["main.handler"]

View file

@ -0,0 +1,127 @@
import pandas as pd
import requests
from backend.address2UPRN.main import (
resolve_uprns_for_postcode_group,
get_epc_data_with_postcode,
)
from tqdm import tqdm
def sanitise_postcode(postcode: str) -> str | None:
"""
Normalise postcode for grouping.
- Uppercase
- Remove all whitespace
"""
if pd.isna(postcode):
return None
return postcode.upper().replace(" ", "")
def is_valid_postcode(postcode_clean: str) -> bool:
"""
Validate postcode using postcodes.io.
Expects a sanitised postcode (e.g. E84SQ).
Returns True if valid, False otherwise.
"""
POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
if not postcode_clean:
return False
try:
resp = requests.get(
POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
timeout=5,
)
resp.raise_for_status()
return resp.json().get("result", False)
except requests.RequestException:
# Network issues, rate limits, etc.
return False
def main():
df = pd.read_excel("hackney.xlsx", sheet_name="Sustainability")
df = df.head(500)
# Sanitise postcodes
df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode)
# --- validate AFTER grouping (save API calls) ---
# Get unique, non-null postcodes
unique_postcodes = df["postcode_clean"].dropna().unique()
# Validate each postcode once, TODOadd a progress bar
postcode_validity = {
pc: is_valid_postcode(pc)
for pc in tqdm(unique_postcodes, total=len(unique_postcodes))
}
# Map validity back onto dataframe
df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
results = []
for postcode, group_df in tqdm(
df[df["postcode_valid"]].groupby("postcode_clean"),
desc="Resolving UPRNs by postcode",
):
try:
epc_df = get_epc_data_with_postcode(postcode)
if epc_df.empty:
tmp = group_df.copy()
tmp["found_uprn"] = None
tmp["status"] = "no_epc_results"
results.append(tmp)
continue
resolved = resolve_uprns_for_postcode_group(
group_df=group_df,
epc_df=epc_df,
)
results.append(resolved)
except Exception as e:
tmp = group_df.copy()
tmp["found_uprn"] = None
tmp["status"] = "exception"
tmp["error"] = str(e)
results.append(tmp)
final_df = pd.concat(results, ignore_index=True)
a = final_df[
[
"best_match_lexiscore",
"Address 1",
"best_match_address",
"Postcode",
"UPRN",
"best_match_uprn",
]
] # add levi score to viewing
b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing
b = b[
[
"best_match_lexiscore",
"Address 1",
"best_match_address",
"Postcode",
"UPRN",
"best_match_uprn",
]
]
def handler(event, context):
print("hello Postcode splitter world")
return {"statusCode": 200, "body": "hello world"}
if __name__ == "__main__":
main()

View file

@ -1,5 +1,11 @@
import os
from backend.app.config import get_settings
import os
from dotenv import load_dotenv
import os
# Load .env in conftest.py directory for local development
load_dotenv()
DEFAULT_ENV = {
"API_KEY": "test",
@ -8,7 +14,10 @@ DEFAULT_ENV = {
"DATA_BUCKET": "test",
"PLAN_TRIGGER_BUCKET": "test",
"ENGINE_SQS_URL": "test",
"EPC_AUTH_TOKEN": "test", # overridden in GitHub Actions
"EPC_AUTH_TOKEN": os.getenv(
"EPC_AUTH_TOKEN",
"test",
), # overridden in GitHub Actions
"GOOGLE_SOLAR_API_KEY": "test",
"DB_HOST": "localhost",
"DB_USERNAME": "test",

View file

View file

@ -0,0 +1,45 @@
import re
from enum import Enum
from typing import List
class EpcConstructionAgeBand(Enum):
before_1900: str = 'England and Wales: before 1900'
from_1900_to_1929: str = 'England and Wales: 1900-1929'
from_1930_to_1949: str = 'England and Wales: 1930-1949'
from_1950_to_1966: str = 'England and Wales: 1950-1966'
from_1967_to_1975: str = 'England and Wales: 1967-1975'
from_1976_to_1982: str = 'England and Wales: 1976-1982'
from_1983_to_1990: str = 'England and Wales: 1983-1990'
from_1991_to_1995: str = 'England and Wales: 1991-1995'
from_1996_to_2002: str = 'England and Wales: 1996-2002'
from_2003_to_2006: str = 'England and Wales: 2003-2006'
from_2007_to_2011: str = 'England and Wales: 2007-2011'
from_2012_onwards: str = 'England and Wales: 2012-onwards'
from_2012_to_2022: str = 'England and Wales: 2012-2022'
from_2023_onwards: str = 'England and Wales: 2023 onwards'
def start_year(self) -> int:
"""
Extract the starting year of the age band.
"""
value = self.value.lower()
if 'before' in value:
return 0
match = re.search(r'(\d{4})', value)
if not match:
raise ValueError(f"Cannot determine start year from '{self.value}'")
return int(match.group(1))
@classmethod
def from_year_onwards(cls, year: int) -> List["EpcConstructionAgeBand"]:
"""
Return all age bands whose starting year is >= the given year.
"""
return [
band
for band in cls
if band.start_year() >= year
]

Some files were not shown because too many files have changed in this diff Show more