mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge pull request #700 from Hestia-Homes/main
Hot fixes for ara bugs + rolling out new terraform & github workflows structure
This commit is contained in:
commit
44f4185a7a
156 changed files with 6806 additions and 1275 deletions
40
.devcontainer/asset_list/Dockerfile
Normal file
40
.devcontainer/asset_list/Dockerfile
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
FROM python:3.11.10-bullseye
|
||||
|
||||
|
||||
ARG USER=vscode
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# 1) Toolchain + utilities for building libpostal
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
sudo jq vim curl git ca-certificates \
|
||||
build-essential pkg-config automake autoconf libtool \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# # 2) Build and install libpostal from source
|
||||
RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
|
||||
&& cd /tmp/libpostal \
|
||||
&& ./bootstrap.sh \
|
||||
&& ./configure --datadir=/usr/local/share/libpostal \
|
||||
&& make -j"$(nproc)" \
|
||||
&& make install \
|
||||
&& ldconfig \
|
||||
&& rm -rf /tmp/libpostal
|
||||
|
||||
# 3) Create the user and grant sudo privileges
|
||||
RUN useradd -m -s /usr/bin/bash ${USER} \
|
||||
&& echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \
|
||||
&& chmod 0440 /etc/sudoers.d/${USER}
|
||||
|
||||
# # 4) Python deps - if you want to run assest list
|
||||
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
ADD .devcontainer/asset_list/requirements.txt requirements2.txt
|
||||
ADD asset_list/requirements.txt requirements1.txt
|
||||
RUN cat requirements1.txt requirements2.txt >> requirements.txt
|
||||
|
||||
RUN pip install -r requirements.txt
|
||||
# 5) Workdir
|
||||
WORKDIR /workspaces/model
|
||||
|
||||
# 6) Make Python find your package
|
||||
# Add project root to PYTHONPATH for all processes
|
||||
ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"name": "Basic Python",
|
||||
"name": "SAL ENV",
|
||||
"dockerComposeFile": "docker-compose.yml",
|
||||
"service": "model",
|
||||
"service": "model-sal",
|
||||
"remoteUser": "vscode",
|
||||
"workspaceFolder": "/workspaces/model",
|
||||
"postStartCommand": "bash .devcontainer/post-install.sh",
|
||||
|
|
@ -1,14 +1,14 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
model:
|
||||
model-sal:
|
||||
user: "${UID}:${GID}"
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: .devcontainer/Dockerfile
|
||||
context: ../..
|
||||
dockerfile: .devcontainer/asset_list/Dockerfile
|
||||
command: sleep infinity
|
||||
volumes:
|
||||
- ..:/workspaces/model
|
||||
- ../../:/workspaces/model
|
||||
networks:
|
||||
- model-net
|
||||
|
||||
|
|
@ -11,4 +11,4 @@ if os.path.exists(env_path):
|
|||
print("✔ Loaded .env into Jupyter kernel")
|
||||
else:
|
||||
print("⚠ No .env file found to load")
|
||||
EOF
|
||||
EOF
|
||||
23
.devcontainer/asset_list/requirements.txt
Normal file
23
.devcontainer/asset_list/requirements.txt
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
fastapi==0.115.2
|
||||
sqlalchemy==2.0.36
|
||||
psycopg2-binary==2.9.10
|
||||
python-jose==3.3.0
|
||||
cryptography==43.0.3
|
||||
mangum==0.19.0
|
||||
# AWS
|
||||
boto3==1.35.44
|
||||
# Data
|
||||
openpyxl==3.1.2
|
||||
# Basic
|
||||
pytz
|
||||
uvicorn[standard]
|
||||
# Testing
|
||||
pytest==9.0.2
|
||||
pytest-cov==7.0.0
|
||||
ipykernel>=6.25,<7
|
||||
pyyaml>=6.0.1
|
||||
sqlmodel
|
||||
# Formatting
|
||||
black==26.1.0
|
||||
dotenv
|
||||
pydantic-settings
|
||||
|
|
@ -34,7 +34,7 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
|
|||
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
ADD backend/engine/requirements.txt requirements1.txt
|
||||
ADD backend/app/requirements/requirements.txt requirements2.txt
|
||||
ADD .devcontainer/requirements.txt requirements3.txt
|
||||
ADD .devcontainer/backend/requirements.txt requirements3.txt
|
||||
RUN cat requirements1.txt requirements2.txt requirements3.txt > requirements.txt
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
40
.devcontainer/backend/devcontainer.json
Normal file
40
.devcontainer/backend/devcontainer.json
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
{
|
||||
"name": "Backend Model Env",
|
||||
"dockerComposeFile": "docker-compose.yml",
|
||||
"service": "model-backend",
|
||||
"remoteUser": "vscode",
|
||||
"workspaceFolder": "/workspaces/model",
|
||||
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
|
||||
"mounts": [
|
||||
"source=${localEnv:HOME},target=/workspaces/home,type=bind"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"ms-python.python",
|
||||
"ms-toolsai.jupyter",
|
||||
"mechatroner.rainbow-csv",
|
||||
"ms-toolsai.datawrangler",
|
||||
"lindacong.vscode-book-reader",
|
||||
"4ops.terraform",
|
||||
"fabiospampinato.vscode-todo-plus",
|
||||
"jgclark.vscode-todo-highlight",
|
||||
"corentinartaud.pdfpreview",
|
||||
"ms-python.vscode-python-envs",
|
||||
"ms-python.black-formatter",
|
||||
"waderyan.gitblame"
|
||||
],
|
||||
"settings": {
|
||||
"files.defaultWorkspace": "/workspaces/model",
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "ms-python.black-formatter",
|
||||
"editor.formatOnSave": true
|
||||
},
|
||||
"python.formatting.provider": "none"
|
||||
}
|
||||
}
|
||||
},
|
||||
"containerEnv": {
|
||||
"PYTHONFLAGS": "-Xfrozen_modules=off"
|
||||
}
|
||||
}
|
||||
28
.devcontainer/backend/docker-compose.yml
Normal file
28
.devcontainer/backend/docker-compose.yml
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
model-backend:
|
||||
user: "${UID}:${GID}"
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: .devcontainer/backend/Dockerfile
|
||||
command: sleep infinity
|
||||
volumes:
|
||||
- ../../:/workspaces/model
|
||||
|
||||
|
||||
db:
|
||||
image: postgres:17.4
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 5432:5432
|
||||
environment:
|
||||
- PGDATABASE=tech_team_local_db
|
||||
- POSTGRES_USER=postgres
|
||||
- POSTGRES_PASSWORD=makingwarmerhomes
|
||||
volumes:
|
||||
- postgres-data-two:/var/lib/postgresql/data
|
||||
|
||||
|
||||
volumes:
|
||||
postgres-data-two:
|
||||
14
.devcontainer/backend/post-install.sh
Normal file
14
.devcontainer/backend/post-install.sh
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
mkdir -p ~/.ipython/profile_default/startup
|
||||
|
||||
cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
# Adjust path as needed
|
||||
env_path = "/workspaces/model/backend/.env"
|
||||
if os.path.exists(env_path):
|
||||
load_dotenv(env_path)
|
||||
print("✔ Loaded .env into Jupyter kernel")
|
||||
else:
|
||||
print("⚠ No .env file found to load")
|
||||
EOF
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
# fastapi
|
||||
|
||||
fastapi==0.115.2
|
||||
sqlalchemy==2.0.36
|
||||
pydantic-settings==2.6.0
|
||||
107
.github/workflows/_build_image.yml
vendored
Normal file
107
.github/workflows/_build_image.yml
vendored
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
name: Build Docker image
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
ecr_repo:
|
||||
required: true
|
||||
type: string
|
||||
dockerfile_path:
|
||||
required: true
|
||||
type: string
|
||||
build_context:
|
||||
required: false
|
||||
default: "."
|
||||
type: string
|
||||
build_args:
|
||||
required: false
|
||||
type: string
|
||||
|
||||
outputs:
|
||||
image_digest:
|
||||
description: "Pushed image digest"
|
||||
value: ${{ jobs.build.outputs.image_digest }}
|
||||
ecr_repo_url:
|
||||
description: "ECR repository URL"
|
||||
value: ${{ jobs.build.outputs.ecr_repo_url }}
|
||||
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID:
|
||||
required: true
|
||||
AWS_SECRET_ACCESS_KEY:
|
||||
required: true
|
||||
AWS_REGION:
|
||||
required: true
|
||||
DEV_DB_HOST:
|
||||
required: false
|
||||
DEV_DB_PORT:
|
||||
required: false
|
||||
DEV_DB_NAME:
|
||||
required: false
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
env:
|
||||
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
|
||||
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
|
||||
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
|
||||
|
||||
outputs:
|
||||
image_digest: ${{ steps.digest.outputs.image_digest }}
|
||||
ecr_repo_url: ${{ steps.repo.outputs.ecr_repo_url }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ secrets.AWS_REGION }}
|
||||
|
||||
- uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Resolve ECR repo URL
|
||||
id: repo
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
||||
|
||||
ECR_REPO_URL="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${{ inputs.ecr_repo }}"
|
||||
|
||||
echo "Resolved ECR repo URL (local var):"
|
||||
echo "$ECR_REPO_URL"
|
||||
|
||||
echo "ecr_repo_url=$ECR_REPO_URL" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Build & push image
|
||||
run: |
|
||||
IMAGE_URI="${{ steps.repo.outputs.ecr_repo_url }}:${GITHUB_SHA}"
|
||||
|
||||
# Writes build args and removes line breaks
|
||||
BUILD_ARGS=""
|
||||
while IFS= read -r line; do
|
||||
# skip empty lines
|
||||
[ -n "$line" ] || continue
|
||||
temp=$(eval echo "$line")
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg $temp"
|
||||
done <<< "${{ inputs.build_args }}"
|
||||
|
||||
docker build \
|
||||
-f ${{ inputs.dockerfile_path }} \
|
||||
$BUILD_ARGS \
|
||||
-t $IMAGE_URI \
|
||||
${{ inputs.build_context }}
|
||||
|
||||
docker push $IMAGE_URI
|
||||
|
||||
- name: Resolve image digest
|
||||
id: digest
|
||||
run: |
|
||||
DIGEST=$(aws ecr describe-images \
|
||||
--repository-name ${{ inputs.ecr_repo }} \
|
||||
--image-ids imageTag=${GITHUB_SHA} \
|
||||
--query 'imageDetails[0].imageDigest' \
|
||||
--output text)
|
||||
echo "image_digest=$DIGEST" >> "$GITHUB_OUTPUT"
|
||||
91
.github/workflows/_deploy_lambda.yml
vendored
Normal file
91
.github/workflows/_deploy_lambda.yml
vendored
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
name: Deploy Lambda (Terraform)
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
lambda_name:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
lambda_path:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
stage:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
ecr_repo:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
image_digest:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID:
|
||||
required: true
|
||||
AWS_SECRET_ACCESS_KEY:
|
||||
required: true
|
||||
AWS_REGION:
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Debug inputs
|
||||
run: |
|
||||
echo "lambda_name=${{ inputs.lambda_name }}"
|
||||
echo "lambda_path=${{ inputs.lambda_path }}"
|
||||
echo "stage=${{ inputs.stage }}"
|
||||
echo "ecr_repo_url=${{ inputs.ecr_repo_url }}"
|
||||
echo "image_digest=${{ inputs.image_digest }}"
|
||||
|
||||
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ secrets.AWS_REGION }}
|
||||
|
||||
- uses: hashicorp/setup-terraform@v3
|
||||
|
||||
- uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Resolve ECR repo URL
|
||||
id: repo
|
||||
env:
|
||||
AWS_REGION: ${{ secrets.AWS_REGION }}
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
||||
ECR_REPO_URL="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${{ inputs.ecr_repo }}"
|
||||
echo "ecr_repo_url=$ECR_REPO_URL" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Terraform Init
|
||||
working-directory: ${{ inputs.lambda_path }}
|
||||
run: terraform init -reconfigure
|
||||
|
||||
- name: Terraform Workspace
|
||||
working-directory: ${{ inputs.lambda_path }}
|
||||
run: |
|
||||
terraform workspace select ${{ inputs.stage }} \
|
||||
|| terraform workspace new ${{ inputs.stage }}
|
||||
|
||||
- name: Terraform Plan
|
||||
working-directory: ${{ inputs.lambda_path }}
|
||||
run: |
|
||||
terraform plan \
|
||||
-var="stage=${{ inputs.stage }}" \
|
||||
-var="lambda_name=${{ inputs.lambda_name }}" \
|
||||
-var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \
|
||||
-var="image_digest=${{ inputs.image_digest }}" \
|
||||
-out=lambdaplan
|
||||
|
||||
- name: Terraform Apply
|
||||
working-directory: ${{ inputs.lambda_path }}
|
||||
run: terraform apply -auto-approve lambdaplan
|
||||
204
.github/workflows/deploy_terraform.yml
vendored
204
.github/workflows/deploy_terraform.yml
vendored
|
|
@ -1,80 +1,172 @@
|
|||
name: Deploy terraform stack
|
||||
name: Deploy infrastructure
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- dev
|
||||
- prod
|
||||
- "**"
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
determine_stage:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
stage: ${{ steps.set-stage.outputs.stage }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Setup AWS credentials file
|
||||
- name: Determine stage from branch
|
||||
id: set-stage
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p ~/.aws
|
||||
echo "[DevAdmin]" > ~/.aws/credentials
|
||||
echo "aws_access_key_id = ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}" >> ~/.aws/credentials
|
||||
echo "aws_secret_access_key = ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}" >> ~/.aws/credentials
|
||||
echo "[ProdAdmin]" >> ~/.aws/credentials
|
||||
echo "aws_access_key_id = ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}" >> ~/.aws/credentials
|
||||
echo "aws_secret_access_key = ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}" >> ~/.aws/credentials
|
||||
env
|
||||
BRANCH="${GITHUB_REF_NAME}"
|
||||
|
||||
- name: Setup AWS config file
|
||||
run: |
|
||||
echo "[profile DevAdmin]" > ~/.aws/config
|
||||
echo "region = eu-west-2" >> ~/.aws/config
|
||||
echo "[profile ProdAdmin]" >> ~/.aws/config
|
||||
echo "region = eu-west-2" >> ~/.aws/config
|
||||
if [[ "$BRANCH" == "prod" ]]; then
|
||||
echo "stage=prod" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v1
|
||||
with:
|
||||
terraform_version: 1.5.2
|
||||
elif [[ "$BRANCH" == "dev" ]]; then
|
||||
echo "stage=dev" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Configure AWS credentials (DevAdmin)
|
||||
uses: aws-actions/configure-aws-credentials@v1
|
||||
else
|
||||
echo "stage=dev" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# ============================================================
|
||||
# 1️⃣ Shared Terraform (infra)
|
||||
# ============================================================
|
||||
shared_terraform:
|
||||
needs: determine_stage
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
STAGE: ${{ needs.determine_stage.outputs.stage }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: eu-west-2
|
||||
env:
|
||||
AWS_PROFILE: "DevAdmin"
|
||||
aws-region: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
- uses: hashicorp/setup-terraform@v3
|
||||
|
||||
- name: Terraform Init
|
||||
run: cd infrastructure/terraform && terraform init
|
||||
working-directory: infrastructure/terraform/shared
|
||||
run: terraform init -reconfigure
|
||||
|
||||
- name: Terraform Workspace
|
||||
run: |
|
||||
BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
|
||||
cd infrastructure/terraform
|
||||
terraform workspace select ${BRANCH_NAME} || terraform workspace new ${BRANCH_NAME}
|
||||
working-directory: infrastructure/terraform/shared
|
||||
run: terraform workspace select ${STAGE} || terraform workspace new ${STAGE}
|
||||
|
||||
- name: Terraform Plan
|
||||
run: |
|
||||
BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
|
||||
cd infrastructure/terraform && terraform plan -var-file=${BRANCH_NAME}.tfvars
|
||||
working-directory: infrastructure/terraform/shared
|
||||
run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan
|
||||
|
||||
- name: Deploy to Dev
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
run: cd infrastructure/terraform && terraform apply -var-file=dev.tfvars -auto-approve
|
||||
env:
|
||||
name: dev
|
||||
- name: Terraform Apply
|
||||
if: env.STAGE == 'prod'
|
||||
working-directory: infrastructure/terraform/shared
|
||||
run: terraform apply -auto-approve tfplan
|
||||
|
||||
- name: Configure AWS credentials (ProdAdmin)
|
||||
uses: aws-actions/configure-aws-credentials@v1
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: eu-west-2
|
||||
env:
|
||||
AWS_PROFILE: "ProdAdmin"
|
||||
# ============================================================
|
||||
# 2️⃣ Build Address 2 UPRN image and Push
|
||||
# ============================================================
|
||||
address2uprn_image:
|
||||
needs: [determine_stage, shared_terraform]
|
||||
uses: ./.github/workflows/_build_image.yml
|
||||
with:
|
||||
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
|
||||
dockerfile_path: backend/address2UPRN/handler/Dockerfile
|
||||
build_context: .
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
# ============================================================
|
||||
# 3️⃣ Deploy Address 2 UPRN Lambda
|
||||
# ============================================================
|
||||
address2uprn_lambda:
|
||||
needs: [address2uprn_image, determine_stage]
|
||||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: address2uprn
|
||||
lambda_path: infrastructure/terraform/lambda/address2UPRN
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.address2uprn_image.outputs.image_digest }}
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 2️⃣ Build Postcode Splitter image and Push
|
||||
# ============================================================
|
||||
postcodeSplitter_image:
|
||||
needs: [determine_stage, shared_terraform]
|
||||
uses: ./.github/workflows/_build_image.yml
|
||||
with:
|
||||
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
|
||||
dockerfile_path: backend/postcode_splitter/handler/Dockerfile
|
||||
build_context: .
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
# ============================================================
|
||||
# 3️⃣ Deploy Postcode Splitter Lambda
|
||||
# ============================================================
|
||||
postcodeSplitter_lambda:
|
||||
needs: [postcodeSplitter_image, determine_stage]
|
||||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: postcodeSplitter
|
||||
lambda_path: infrastructure/terraform/lambda/postcodeSplitter
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }}
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
# ============================================================
|
||||
# Condition ETL image and Push
|
||||
# ============================================================
|
||||
condition_etl_image:
|
||||
needs: [determine_stage, shared_terraform]
|
||||
uses: ./.github/workflows/_build_image.yml
|
||||
with:
|
||||
ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
|
||||
dockerfile_path: backend/condition/handler/Dockerfile
|
||||
build_context: .
|
||||
build_args: |
|
||||
DEV_DB_HOST=$DEV_DB_HOST
|
||||
DEV_DB_PORT=$DEV_DB_PORT
|
||||
DEV_DB_NAME=$DEV_DB_NAME
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
|
||||
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
|
||||
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
|
||||
|
||||
# ============================================================
|
||||
# Deploy Condition ETL Lambda
|
||||
# ============================================================
|
||||
condition_etl_lambda:
|
||||
needs: [condition_etl_image, determine_stage]
|
||||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: condition-etl
|
||||
lambda_path: infrastructure/terraform/lambda/condition-etl
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.condition_etl_image.outputs.image_digest }}
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
- name: Deploy to Prod
|
||||
if: github.ref == 'refs/heads/prod'
|
||||
run: cd infrastructure/terraform && terraform apply -var-file=prod.tfvars -auto-approve
|
||||
env:
|
||||
name: prod
|
||||
|
|
|
|||
6
.github/workflows/unit_tests.yml
vendored
6
.github/workflows/unit_tests.yml
vendored
|
|
@ -2,6 +2,12 @@ name: Run unit tests
|
|||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
push:
|
||||
branches:
|
||||
- "**"
|
||||
|
||||
|
||||
jobs:
|
||||
test:
|
||||
|
|
|
|||
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
|
|
@ -9,9 +9,12 @@
|
|||
"path": "/bin/bash"
|
||||
}
|
||||
},
|
||||
<<<<<<< HEAD
|
||||
=======
|
||||
"python.testing.unittestEnabled": false,
|
||||
"python.testing.pytestEnabled": true,
|
||||
"python.testing.pytestArgs": ["-s", "-q", "--no-cov"]
|
||||
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
|
||||
|
||||
// Hot reload setting that needs to be in user settings
|
||||
// "jupyter.runStartupCommands": [
|
||||
|
|
|
|||
|
|
@ -34,7 +34,8 @@ from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
|
|||
logger = setup_logger()
|
||||
|
||||
# OpenAI API Key (set this in your environment variables for security)
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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")
|
||||
|
||||
|
||||
|
||||
class DataRemapper:
|
||||
|
|
@ -1159,13 +1160,17 @@ class AssetList:
|
|||
),
|
||||
axis=1
|
||||
)
|
||||
|
||||
col = self.EPC_API_DATA_NAMES["roof-description"]
|
||||
|
||||
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = self.standardised_asset_list.apply(
|
||||
lambda x: RoofAttributes(description=x[self.EPC_API_DATA_NAMES["roof-description"]]).process()[
|
||||
lambda x: RoofAttributes(description=x[col]).process()[
|
||||
"insulation_thickness"] if not pd.isnull(
|
||||
x[self.EPC_API_DATA_NAMES["roof-description"]]) else None,
|
||||
x[col]) else None,
|
||||
axis=1
|
||||
)
|
||||
|
||||
|
||||
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = (
|
||||
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].str.replace("+", "")
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# OpenAI API Key (set this in your environment variables for security)
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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")
|
||||
|
||||
|
||||
class DataRemapper:
|
||||
|
|
|
|||
0
asset_list/__init__.py
Normal file
0
asset_list/__init__.py
Normal file
|
|
@ -14,22 +14,32 @@ from dotenv import load_dotenv
|
|||
from backend.SearchEpc import SearchEpc
|
||||
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
EPC_AUTH_TOKEN = os.getenv(
|
||||
"EPC_AUTH_TOKEN",
|
||||
)
|
||||
|
||||
|
||||
def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"):
|
||||
def extract_address1(
|
||||
asset_list, full_address_col, postcode_col, method="first_two_words"
|
||||
):
|
||||
if method == "first_two_words":
|
||||
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
|
||||
asset_list["address1_extracted"] = (
|
||||
asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
|
||||
)
|
||||
return asset_list
|
||||
|
||||
if method == "first_word":
|
||||
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0]
|
||||
asset_list["address1_extracted"] = (
|
||||
asset_list[full_address_col].str.split(" ").str[0]
|
||||
)
|
||||
return asset_list
|
||||
|
||||
if method == "house_number_extraction":
|
||||
asset_list["address1_extracted"] = asset_list.apply(
|
||||
lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]),
|
||||
axis=1
|
||||
lambda x: SearchEpc.get_house_number(
|
||||
address=x[full_address_col], postcode=x[postcode_col]
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
return asset_list
|
||||
|
||||
|
|
@ -59,24 +69,24 @@ def app():
|
|||
Property UPRN
|
||||
"""
|
||||
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney"
|
||||
data_filename = "Domna SHF Wave 3 (3).xlsx"
|
||||
sheet_name = "Domna Wave 3"
|
||||
postcode_column = 'Postcode'
|
||||
address1_column = "Address 1"
|
||||
address1_method = None
|
||||
fulladdress_column = None
|
||||
address_cols_to_concat = ["Address 1"]
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Aspire"
|
||||
data_filename = "ASPIRE ASSET LIST.xlsx"
|
||||
sheet_name = "Asset List"
|
||||
postcode_column = "Postcode"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
fulladdress_column = "Address"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Construction Years"
|
||||
landlord_os_uprn = "UPRN"
|
||||
landlord_property_type = "Type"
|
||||
landlord_built_form = "Attachment"
|
||||
landlord_wall_construction = "Wall type"
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Row ID"
|
||||
landlord_property_id = "LLUPRN"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
|
|
@ -93,25 +103,27 @@ def app():
|
|||
landlord_block_reference = None
|
||||
|
||||
# Peabody data for cleaning
|
||||
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
|
||||
"Project/data_validation")
|
||||
data_folder = (
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
|
||||
"Project/data_validation"
|
||||
)
|
||||
data_filename = "to_standardise_uprns.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = 'Postcode'
|
||||
address1_column = "Address 1"
|
||||
address1_method = None
|
||||
fulladdress_column = None
|
||||
address_cols_to_concat = ["Address 1", "Address 2", "Address 3"]
|
||||
postcode_column = "Postcode"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
fulladdress_column = "Address"
|
||||
address_cols_to_concat = None
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Type"
|
||||
landlord_built_form = "Attachment"
|
||||
landlord_property_type = None
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Org Ref"
|
||||
landlord_property_id = "LLUPRN"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
|
|
@ -127,40 +139,6 @@ def app():
|
|||
asset_list_header = 0
|
||||
landlord_block_reference = None
|
||||
|
||||
# Lambeth:
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th"
|
||||
# data_filename = "lambeth_sw2_leigham court estate.xlsx"
|
||||
# sheet_name = "Sheet1"
|
||||
# postcode_column = 'Postcode'
|
||||
# address1_column = "Address"
|
||||
# address1_method = None
|
||||
# fulladdress_column = None
|
||||
# address_cols_to_concat = ["Address"]
|
||||
# missing_postcodes_method = None
|
||||
# landlord_year_built = None
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = None
|
||||
# landlord_built_form = None
|
||||
# landlord_wall_construction = None
|
||||
# landlord_roof_construction = None
|
||||
# landlord_heating_system = None
|
||||
# landlord_existing_pv = None
|
||||
# landlord_property_id = "row_id"
|
||||
# landlord_sap = None
|
||||
# outcomes_filename = None
|
||||
# outcomes_sheetname = None
|
||||
# outcomes_postcode = None
|
||||
# outcomes_houseno = None
|
||||
# outcomes_id = None
|
||||
# outcomes_address = None
|
||||
# master_filepaths = []
|
||||
# master_id_colnames = []
|
||||
# master_to_asset_list_filepath = None
|
||||
# phase = False
|
||||
# ecosurv_landlords = None
|
||||
# asset_list_header = 0
|
||||
# landlord_block_reference = None
|
||||
|
||||
# Maps addresses to uprn in problematic cases
|
||||
manual_uprn_map = {}
|
||||
|
||||
|
|
@ -185,49 +163,62 @@ def app():
|
|||
landlord_existing_pv=landlord_existing_pv,
|
||||
landlord_sap=landlord_sap,
|
||||
landlord_block_reference=landlord_block_reference,
|
||||
phase=phase
|
||||
phase=phase,
|
||||
)
|
||||
asset_list.init_standardise()
|
||||
|
||||
# We produce the new maps, which can be saved for future useage
|
||||
new_property_type_map = {
|
||||
k: v for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_property_type] if
|
||||
asset_list.landlord_property_type else {}
|
||||
k: v
|
||||
for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_property_type]
|
||||
if asset_list.landlord_property_type
|
||||
else {}
|
||||
).items()
|
||||
if k not in PROPERTY_MAPPING
|
||||
}
|
||||
new_built_form_map = {
|
||||
k: v for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_built_form] if
|
||||
asset_list.landlord_built_form else {}
|
||||
k: v
|
||||
for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_built_form]
|
||||
if asset_list.landlord_built_form
|
||||
else {}
|
||||
).items()
|
||||
if k not in BUILT_FORM_MAPPINGS
|
||||
}
|
||||
new_wall_map = {
|
||||
k: v for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_wall_construction] if
|
||||
asset_list.landlord_wall_construction else {}
|
||||
k: v
|
||||
for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_wall_construction]
|
||||
if asset_list.landlord_wall_construction
|
||||
else {}
|
||||
).items()
|
||||
if k not in WALL_CONSTRUCTION_MAPPINGS
|
||||
}
|
||||
new_heating_map = {
|
||||
k: v for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_heating_system] if
|
||||
asset_list.landlord_heating_system else {}
|
||||
k: v
|
||||
for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_heating_system]
|
||||
if asset_list.landlord_heating_system
|
||||
else {}
|
||||
).items()
|
||||
if k not in HEATING_MAPPINGS
|
||||
}
|
||||
new_existing_pv_map = {
|
||||
k: v for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
|
||||
k: v
|
||||
for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_existing_pv]
|
||||
if asset_list.landlord_existing_pv
|
||||
else {}
|
||||
).items()
|
||||
if k not in EXISTING_PV_MAPPINGS
|
||||
}
|
||||
new_roof_construction_map = {
|
||||
k: v for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_roof_construction] if
|
||||
asset_list.landlord_roof_construction else {}
|
||||
k: v
|
||||
for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_roof_construction]
|
||||
if asset_list.landlord_roof_construction
|
||||
else {}
|
||||
).items()
|
||||
if k not in ROOF_CONSTRUCTION_MAPPINGS
|
||||
}
|
||||
|
|
@ -241,7 +232,7 @@ def app():
|
|||
outcomes_address=outcomes_address,
|
||||
outcomes_postcode=outcomes_postcode,
|
||||
outcomes_houseno=outcomes_houseno,
|
||||
outcomes_id=outcomes_id
|
||||
outcomes_id=outcomes_id,
|
||||
)
|
||||
|
||||
asset_list.flag_survey_master(
|
||||
|
|
@ -275,14 +266,16 @@ def app():
|
|||
skip = max(chunk_indexes)
|
||||
|
||||
if any(x in folder_contents for x in downloaded_files):
|
||||
skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents])
|
||||
skip = max(
|
||||
[i for i in chunk_indexes if filename.format(i=i) in folder_contents]
|
||||
)
|
||||
|
||||
for i in range(0, len(asset_list.standardised_asset_list), chunk_size):
|
||||
print(f"Processing chunk {i} to {i + chunk_size}")
|
||||
if skip is not None and not force_retrieve_data:
|
||||
if i <= skip:
|
||||
continue
|
||||
chunk = asset_list.standardised_asset_list[i:i + chunk_size]
|
||||
chunk = asset_list.standardised_asset_list[i: i + chunk_size]
|
||||
epc_data_chunk, errors_chunk, no_epc_chunk = get_data(
|
||||
df=chunk,
|
||||
row_id_name=asset_list.DOMNA_PROPERTY_ID,
|
||||
|
|
@ -294,7 +287,7 @@ def app():
|
|||
built_form_column=AssetList.STANDARD_BUILT_FORM,
|
||||
manual_uprn_map=manual_uprn_map,
|
||||
epc_api_only=epc_api_only,
|
||||
epc_auth_token=EPC_AUTH_TOKEN
|
||||
epc_auth_token=EPC_AUTH_TOKEN,
|
||||
)
|
||||
|
||||
# We now retrieve any failed properties
|
||||
|
|
@ -317,7 +310,9 @@ def app():
|
|||
|
||||
# Append the failed data to the main data
|
||||
# Store the chunk locally as a csv
|
||||
pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False)
|
||||
pd.DataFrame(epc_data_chunk).to_csv(
|
||||
os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False
|
||||
)
|
||||
# Store the errors and no-data locally
|
||||
with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f:
|
||||
json.dump(errors_chunk, f)
|
||||
|
|
@ -348,7 +343,9 @@ def app():
|
|||
|
||||
unique_recommendations = set()
|
||||
for _, row in recommendations_df.iterrows():
|
||||
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
|
||||
unique_recommendations.update(
|
||||
[rec["improvement-summary-text"] for rec in row["recommendations"]]
|
||||
)
|
||||
|
||||
columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations)
|
||||
transformed_data = []
|
||||
|
|
@ -368,20 +365,24 @@ def app():
|
|||
transformed_df = pd.DataFrame(transformed_data)
|
||||
for col in [
|
||||
"Floor insulation (solid floor)",
|
||||
"Floor insulation", "Floor insulation (suspended floor)"
|
||||
"Floor insulation",
|
||||
"Floor insulation (suspended floor)",
|
||||
]:
|
||||
if col not in transformed_df.columns:
|
||||
transformed_df[col] = False
|
||||
transformed_df = transformed_df[
|
||||
[
|
||||
asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)",
|
||||
"Floor insulation", "Floor insulation (suspended floor)"
|
||||
asset_list.DOMNA_PROPERTY_ID,
|
||||
"Floor insulation (solid floor)",
|
||||
"Floor insulation",
|
||||
"Floor insulation (suspended floor)",
|
||||
]
|
||||
]
|
||||
|
||||
transformed_df["epc_has_floor_recommendation"] = (
|
||||
transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] |
|
||||
transformed_df["Floor insulation (suspended floor)"]
|
||||
transformed_df["Floor insulation (solid floor)"]
|
||||
| transformed_df["Floor insulation"]
|
||||
| transformed_df["Floor insulation (suspended floor)"]
|
||||
)
|
||||
|
||||
# Get the find my epc data
|
||||
|
|
@ -394,21 +395,20 @@ def app():
|
|||
find_my_epc_data.append(
|
||||
{
|
||||
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID],
|
||||
**x["find_my_epc_data"]
|
||||
**x["find_my_epc_data"],
|
||||
}
|
||||
)
|
||||
else:
|
||||
find_my_epc_data.append(
|
||||
{
|
||||
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]
|
||||
}
|
||||
{asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]}
|
||||
)
|
||||
|
||||
find_my_epc_data = pd.DataFrame(find_my_epc_data)
|
||||
|
||||
find_my_epc_data = find_my_epc_data.merge(
|
||||
transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]],
|
||||
how="left", on=asset_list.DOMNA_PROPERTY_ID
|
||||
how="left",
|
||||
on=asset_list.DOMNA_PROPERTY_ID,
|
||||
)
|
||||
|
||||
# We check if we get the solar pv column:
|
||||
|
|
@ -418,27 +418,33 @@ def app():
|
|||
# Retrieve just the data we need
|
||||
epc_df = epc_df[
|
||||
[asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
|
||||
].rename(
|
||||
columns=asset_list.EPC_API_DATA_NAMES
|
||||
)
|
||||
].rename(columns=asset_list.EPC_API_DATA_NAMES)
|
||||
|
||||
# Look for columns not in the find my EPC data, which will have happened if we didn't
|
||||
# retrieve it in the first place
|
||||
missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns]
|
||||
missed_find_epc_cols = [
|
||||
c
|
||||
for c in list(asset_list.FIND_EPC_DATA_NAMES.keys())
|
||||
if c not in find_my_epc_data.columns
|
||||
]
|
||||
if missed_find_epc_cols:
|
||||
for c in missed_find_epc_cols:
|
||||
find_my_epc_data[c] = None
|
||||
|
||||
epc_df = epc_df.merge(
|
||||
find_my_epc_data[
|
||||
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())
|
||||
]
|
||||
.rename(columns=asset_list.FIND_EPC_DATA_NAMES),
|
||||
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]
|
||||
+ list(asset_list.FIND_EPC_DATA_NAMES.keys())
|
||||
].rename(columns=asset_list.FIND_EPC_DATA_NAMES),
|
||||
how="left",
|
||||
on=asset_list.DOMNA_PROPERTY_ID
|
||||
on=asset_list.DOMNA_PROPERTY_ID,
|
||||
)
|
||||
|
||||
asset_list.merge_data(epc_df)
|
||||
# asset_list.standardised_asset_list = asset_list.standardised_asset_list[
|
||||
# asset_list.standardised_asset_list["domna_full_address"]
|
||||
# != "120 Airdrie Crescent, Burnley, Lancashire"
|
||||
# ]
|
||||
asset_list.extract_attributes()
|
||||
asset_list.identify_worktypes()
|
||||
|
||||
|
|
@ -448,7 +454,10 @@ def app():
|
|||
asset_list.get_work_figures()
|
||||
|
||||
# Store as an excel
|
||||
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
|
||||
filename = (
|
||||
os.path.join(data_folder, ".".join(data_filename.split(".")[:-1]))
|
||||
+ " - Standardised.xlsx"
|
||||
)
|
||||
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
|
||||
|
||||
# Determine inspections priority
|
||||
|
|
@ -472,26 +481,42 @@ def app():
|
|||
# )
|
||||
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
|
||||
asset_list.standardised_asset_list.to_excel(
|
||||
writer, sheet_name="Standardised Asset List", index=False
|
||||
)
|
||||
if asset_list.block_analysis_df is not None:
|
||||
asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
|
||||
asset_list.block_analysis_df.to_excel(
|
||||
writer, sheet_name="Block Analysis", index=False
|
||||
)
|
||||
# If we have outcomes, we add a tab with the outcomes
|
||||
if not asset_list.outcomes_for_output.empty:
|
||||
asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
|
||||
asset_list.outcomes_for_output.to_excel(
|
||||
writer, sheet_name="Outcomes", index=False
|
||||
)
|
||||
|
||||
if not asset_list.unmatched_submissions.empty:
|
||||
asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
|
||||
asset_list.unmatched_submissions.to_excel(
|
||||
writer, sheet_name="Unmatched Submissions", index=False
|
||||
)
|
||||
|
||||
if not asset_list.outcomes_no_match.empty:
|
||||
asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False)
|
||||
asset_list.outcomes_no_match.to_excel(
|
||||
writer, sheet_name="Unmatched Outcomes", index=False
|
||||
)
|
||||
|
||||
if not asset_list.ecosurv_no_match.empty:
|
||||
asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
|
||||
asset_list.ecosurv_no_match.to_excel(
|
||||
writer, sheet_name="Unmatched Ecosurv", index=False
|
||||
)
|
||||
|
||||
if not asset_list.geographical_areas.empty:
|
||||
asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
|
||||
asset_list.geographical_areas.to_excel(
|
||||
writer, sheet_name="Geographical Areas", index=False
|
||||
)
|
||||
|
||||
# Store dupes
|
||||
if asset_list.duplicated_addresses is not None:
|
||||
if not asset_list.duplicated_addresses.empty:
|
||||
asset_list.duplicated_addresses.to_excel(writer, sheet_name="Duplicate Properties", index=False)
|
||||
asset_list.duplicated_addresses.to_excel(
|
||||
writer, sheet_name="Duplicate Properties", index=False
|
||||
)
|
||||
|
|
|
|||
|
|
@ -520,4 +520,14 @@ BUILT_FORM_MAPPINGS = {
|
|||
'2.EXT.WALL FLAT': 'mid-terrace',
|
||||
'2 EXT. WALL FLAT': 'mid-terrace',
|
||||
|
||||
'Maisonette: Detached: Ground Floor': 'detached',
|
||||
'Maisonette: Enclosed End Terrace: Top Floor': 'enclosed end-terrace',
|
||||
'Flat: End Terrace: Basement': 'end-terrace',
|
||||
'Flat: Mid Terrace: Basement': 'mid-terrace',
|
||||
'Flat: Enclosed Mid Terrace: Basement': 'enclosed mid-terrace',
|
||||
'House: Semi Detached: Top Floor': 'semi-detached',
|
||||
'House: End Terrace: Ground Floor': 'end-terrace',
|
||||
'Maisonette: Enclosed End Terrace: Mid Floor': 'enclosed end-terrace',
|
||||
'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace'
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,5 +17,10 @@ EXISTING_PV_MAPPINGS = {
|
|||
'PV: 10% roof area, PV: 2kWp array': 'already has PV',
|
||||
'PV: 50% roof area': 'already has PV',
|
||||
'Solar PV': 'already has PV',
|
||||
'SOLAR PV': 'already has PV'
|
||||
'SOLAR PV': 'already has PV',
|
||||
|
||||
'PV: 40% roof area, PV: 2kWp array': 'already has PV',
|
||||
'PV: 33% roof area, PV: 2kWp array': 'already has PV',
|
||||
'PV: 30% roof area': 'already has PV'
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -494,6 +494,10 @@ HEATING_MAPPINGS = {
|
|||
'Gas (including LPG) room heaters: Gas fire, open flue, 1980 or later (open fronted), sitting proud of, '
|
||||
'and sealed to, fireplace opening': 'room heaters',
|
||||
'Boiler: A rated Regular Boiler, System 2: Boiler: C rated Regular Boiler': 'boiler - other fuel',
|
||||
'Boiler: G rated Combi': 'gas condensing combi'
|
||||
'Boiler: G rated Combi': 'gas condensing combi',
|
||||
|
||||
'Boiler: A rated Combi, System 2: Boiler: A rated Combi': 'gas combi boiler',
|
||||
'System 2: Boiler: A rated Regular Boiler, Boiler: A rated Regular Boiler': 'gas boiler, radiators',
|
||||
'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler'
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -427,6 +427,23 @@ PROPERTY_MAPPING = {
|
|||
'End Terrace': 'unknown',
|
||||
'Detached': 'unknown',
|
||||
'Mid-terrace': 'unknown',
|
||||
'MID - TERRACE': 'unknown'
|
||||
'MID - TERRACE': 'unknown',
|
||||
'COMOFF': 'unknown',
|
||||
'LOTS': 'unknown',
|
||||
|
||||
'Maisonette: Detached: Ground Floor': 'maisonette',
|
||||
'Maisonette: Enclosed End Terrace: Top Floor': 'maisonette',
|
||||
'Flat: End Terrace: Basement': 'flat',
|
||||
'Bungalow: EnclosedEndTerrace': 'bungalow',
|
||||
'Flat: Mid Terrace: Basement': 'flat',
|
||||
'House: Semi Detached: Top Floor': 'house',
|
||||
'House: End Terrace: Ground Floor': 'house',
|
||||
'Maisonette: Enclosed End Terrace: Mid Floor': 'maisonette',
|
||||
'Flat: Enclosed Mid Terrace: Basement': 'flat',
|
||||
|
||||
'Warden Bungalow': 'bungalow',
|
||||
'Warden Flat': 'flat',
|
||||
'Upper Floor Flat': 'flat',
|
||||
'Extracare Scheme': 'other'
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -301,4 +301,13 @@ ROOF_CONSTRUCTION_MAPPINGS = {
|
|||
'PitchedWithSlopingCeiling: As Built': 'pitched insulated',
|
||||
'PitchedNormalLoftAccess: As Built': 'pitched unknown insulation',
|
||||
|
||||
'Flat: 150mm, Flat: Unknown': 'flat insulated',
|
||||
'AnotherDwellingAbove: Unknown, Flat: Unknown': 'another dwelling above',
|
||||
'AnotherDwellingAbove, AnotherDwellingAbove: Unknown': 'another dwelling above',
|
||||
'PitchedNormalNoLoftAccess: Unknown, PitchedWithSlopingCeiling: As Built': 'pitched unknown access to loft',
|
||||
'Flat: No Insulation': 'flat uninsulated',
|
||||
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 250mm': 'another dwelling above',
|
||||
'PitchedNormalLoftAccess: 175mm': 'pitched insulated',
|
||||
'AnotherDwellingAbove: 300mm': 'another dwelling above'
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -354,6 +354,15 @@ WALL_CONSTRUCTION_MAPPINGS = {
|
|||
'System built Internal': 'insulated system built',
|
||||
|
||||
'Cavity: AsBuilt (1976-1982), TimberFrame: AsBuilt': 'cavity unknown insulation',
|
||||
'Cavity: FilledCavityPlusExternal': 'filled cavity'
|
||||
'Cavity: FilledCavityPlusExternal': 'filled cavity',
|
||||
|
||||
'Cavity, Filled Cavity': 'filled cavity',
|
||||
'Solid Brick, As Built': 'solid brick unknown insulation',
|
||||
'Cavity, As Built': 'cavity unknown insulation',
|
||||
'Sandstone, As Built': 'sandstone or limestone unknown insulation',
|
||||
'Timber Frame, As Built': 'timber frame unknown insulation',
|
||||
'Solid Brick, Internal Insulation': 'insulated solid brick',
|
||||
'Granite or Whinstone, As Built': 'granite or whinstone unknown insulation',
|
||||
'Solid Brick, External': 'insulated solid brick'
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
postal
|
||||
pandas
|
||||
usaddress
|
||||
pydantic-settings==2.6.0
|
||||
epc-api-python==1.0.2
|
||||
thefuzz
|
||||
boto3
|
||||
|
|
@ -10,6 +9,5 @@ openai>=1.3.5
|
|||
tiktoken
|
||||
msgpack
|
||||
beautifulsoup4
|
||||
pydantic>=1.10.7
|
||||
typing-extensions>=4.5.0
|
||||
requests>=2.28.2
|
||||
requests>=2.28.2
|
||||
22
backend/.env.test
Normal file
22
backend/.env.test
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
DB_HOST=db
|
||||
DB_PORT=5432
|
||||
DB_NAME=tech_team_local_db
|
||||
DB_USERNAME=postgres
|
||||
DB_PASSWORD=makingwarmerhomes
|
||||
|
||||
|
||||
#not used
|
||||
GOOGLE_SOLAR_API_KEY=test
|
||||
SAP_PREDICTIONS_BUCKET=test
|
||||
CARBON_PREDICTIONS_BUCKET=test
|
||||
HEAT_PREDICTIONS_BUCKET=test
|
||||
HEATING_KWH_PREDICTIONS_BUCKET=test
|
||||
HOTWATER_KWH_PREDICTIONS_BUCKET=test
|
||||
API_KEY=test
|
||||
ENVIRONMENT=test
|
||||
SECRET_KEY=test
|
||||
PLAN_TRIGGER_BUCKET=test
|
||||
DATA_BUCKET=test
|
||||
EPC_AUTH_TOKEN=test
|
||||
ENGINE_SQS_URL=test
|
||||
ENERGY_ASSESSMENTS_BUCKET=test
|
||||
|
|
@ -1256,7 +1256,8 @@ class Property:
|
|||
"biodiesel": "Smokeless Fuel",
|
||||
"b30d": "B30K Biofuel",
|
||||
"coal": "Coal",
|
||||
"oil": "Oil"
|
||||
"oil": "Oil",
|
||||
"unknown": None # Handle - anything post 2020 is electricity else gas
|
||||
}
|
||||
|
||||
self.heating_energy_source = list({
|
||||
|
|
@ -1326,7 +1327,16 @@ class Property:
|
|||
if self.heating_energy_source == "Varied (Community Scheme)":
|
||||
|
||||
if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown
|
||||
self.heating_energy_source = fuel_map[self.main_fuel["fuel_type"]]
|
||||
mapped_to = fuel_map[self.main_fuel["fuel_type"]]
|
||||
if mapped_to is None and self.main_fuel["fuel_type"] == "unknown":
|
||||
# Handle logic based on age band
|
||||
if self.year_built >= 2020:
|
||||
self.heating_energy_source = "Electricity"
|
||||
else:
|
||||
self.heating_energy_source = "Natural Gas (Community Scheme)"
|
||||
|
||||
else:
|
||||
self.heating_energy_source = mapped_to
|
||||
else:
|
||||
raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}")
|
||||
|
||||
|
|
|
|||
20
backend/address2UPRN/README.md
Normal file
20
backend/address2UPRN/README.md
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
We have list of address as input.
|
||||
|
||||
It'll come in batches of the same post code and from then we want to somehow convert that into UPRN
|
||||
|
||||
if this lambda/function can do that we'll be speeding ahead
|
||||
|
||||
|
||||
Energy Performance Information: https://epc.opendatacommunities.org/
|
||||
|
||||
guidance page: https://epc.opendatacommunities.org/docs/guidance#field_domestic_LMK_KEY
|
||||
|
||||
Example of past khalims code that he wrote some tests for: https://github.com/Hestia-Homes/Model/blob/941be42b83a590e838fd3ee475bfd1ff31438789/backend/tests/test_search_epc.py#L11
|
||||
|
||||
|
||||
Example of EPC search: https://github.com/Hestia-Homes/Model/blob/941be42b83a590e838fd3ee475bfd1ff31438789/backend/SearchEpc.py#L118
|
||||
|
||||
|
||||
|
||||
Khalim has made a python package to help scrape data: https://github.com/KhalimCK/epc-api-python
|
||||
|
||||
0
backend/address2UPRN/__init__.py
Normal file
0
backend/address2UPRN/__init__.py
Normal file
23
backend/address2UPRN/handler/Dockerfile
Normal file
23
backend/address2UPRN/handler/Dockerfile
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
FROM public.ecr.aws/lambda/python:3.10
|
||||
|
||||
# Set working directory (Lambda task root)
|
||||
WORKDIR /var/task
|
||||
|
||||
# -----------------------------
|
||||
# Copy requirements FIRST (for Docker layer caching)
|
||||
# -----------------------------
|
||||
COPY backend/address2UPRN/handler/requirements.txt .
|
||||
|
||||
# Install dependencies into Lambda runtime
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# -----------------------------
|
||||
# Copy application code
|
||||
# -----------------------------
|
||||
COPY utils/ utils/
|
||||
COPY backend/address2UPRN/main.py .
|
||||
|
||||
# -----------------------------
|
||||
# Lambda handler
|
||||
# -----------------------------
|
||||
CMD ["main.handler"]
|
||||
3
backend/address2UPRN/handler/requirements.txt
Normal file
3
backend/address2UPRN/handler/requirements.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
epc-api-python==1.0.2
|
||||
tqdm
|
||||
pandas
|
||||
571
backend/address2UPRN/main.py
Normal file
571
backend/address2UPRN/main.py
Normal file
|
|
@ -0,0 +1,571 @@
|
|||
from epc_api.client import EpcClient
|
||||
import os
|
||||
from urllib.parse import urlencode
|
||||
import pandas as pd
|
||||
from difflib import SequenceMatcher
|
||||
from tqdm import tqdm
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
import re
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv(
|
||||
"EPC_AUTH_TOKEN",
|
||||
)
|
||||
|
||||
if EPC_AUTH_TOKEN is None:
|
||||
raise RuntimeError("EPC_AUTH_TOKEN not defined in env")
|
||||
|
||||
import re
|
||||
from difflib import SequenceMatcher
|
||||
from typing import Set
|
||||
|
||||
|
||||
def levenshtein(a: str, b: str) -> float:
|
||||
"""
|
||||
Address similarity score in [0, 1].
|
||||
|
||||
Strategy:
|
||||
- Normalise
|
||||
- Strongly penalise mismatched house/flat numbers
|
||||
- Combine token overlap + character similarity
|
||||
"""
|
||||
|
||||
def extract_number_sequence(s: str) -> list[str]:
|
||||
return re.findall(r"\d+[a-z]?", s)
|
||||
|
||||
def extract_numbers(s: str) -> Set[str]:
|
||||
return set(extract_number_sequence(s))
|
||||
|
||||
def tokenise(s: str) -> Set[str]:
|
||||
return set(s.split())
|
||||
|
||||
def extract_building_number(s: str) -> str | None:
|
||||
"""
|
||||
Extract the main building number (NOT flat/unit).
|
||||
Assumes formats like:
|
||||
- '42 moreton road'
|
||||
- 'flat 3 42 moreton road'
|
||||
"""
|
||||
tokens = s.split()
|
||||
|
||||
# remove flat/unit context
|
||||
cleaned = []
|
||||
skip_next = False
|
||||
for t in tokens:
|
||||
if t in ("flat", "apt", "apartment", "unit"):
|
||||
skip_next = True
|
||||
continue
|
||||
if skip_next:
|
||||
skip_next = False
|
||||
continue
|
||||
cleaned.append(t)
|
||||
|
||||
# first remaining number is building number
|
||||
for t in cleaned:
|
||||
if re.fullmatch(r"\d+[a-z]?", t):
|
||||
return t
|
||||
|
||||
return None
|
||||
|
||||
a_norm = normalise_address(a)
|
||||
b_norm = normalise_address(b)
|
||||
|
||||
# --- hard signal: numbers ---
|
||||
nums_a = extract_numbers(a_norm)
|
||||
nums_b = extract_numbers(b_norm)
|
||||
|
||||
if nums_a and not nums_b:
|
||||
return 0.0
|
||||
|
||||
# No shared numbers at all → impossible match
|
||||
if nums_a and nums_b and nums_a.isdisjoint(nums_b):
|
||||
return 0.0
|
||||
|
||||
# 🔒 HARD GUARD: building number must match
|
||||
bld_a = extract_building_number(a_norm)
|
||||
bld_b = extract_building_number(b_norm)
|
||||
|
||||
if bld_a and bld_b and bld_a != bld_b:
|
||||
return 0.0
|
||||
|
||||
# --- order-sensitive flat/building guard ---
|
||||
seq_a = extract_number_sequence(a_norm)
|
||||
seq_b = extract_number_sequence(b_norm)
|
||||
|
||||
has_flat_token_user = any(
|
||||
tok in a_norm for tok in ("flat", "apt", "apartment", "unit")
|
||||
)
|
||||
has_flat_token_epc = "flat" in b_norm
|
||||
|
||||
if (
|
||||
len(seq_a) == 2
|
||||
and len(seq_b) >= 2
|
||||
and has_flat_token_epc
|
||||
and not has_flat_token_user
|
||||
and seq_a != seq_b[:2]
|
||||
):
|
||||
return 0.0
|
||||
|
||||
# --- token similarity (order-independent) ---
|
||||
toks_a = tokenise(a_norm)
|
||||
toks_b = tokenise(b_norm)
|
||||
|
||||
if not toks_a or not toks_b:
|
||||
token_score = 0.0
|
||||
else:
|
||||
token_score = len(toks_a & toks_b) / len(toks_a | toks_b)
|
||||
|
||||
# --- character similarity (soft signal) ---
|
||||
char_score = SequenceMatcher(None, a_norm, b_norm).ratio()
|
||||
|
||||
# --- weighted blend ---
|
||||
return round(
|
||||
0.65 * token_score + 0.35 * char_score,
|
||||
4,
|
||||
)
|
||||
|
||||
|
||||
def normalise_address(s: str) -> str:
|
||||
"""
|
||||
Canonical UK-focused address normalisation.
|
||||
|
||||
- Lowercases
|
||||
- Removes punctuation (keeps / for flats)
|
||||
- Normalises whitespace
|
||||
- Applies synonym compression at token level
|
||||
"""
|
||||
|
||||
if not s:
|
||||
return ""
|
||||
|
||||
ADDRESS_SYNONYMS = {
|
||||
# street types
|
||||
"rd": "road",
|
||||
"rd.": "road",
|
||||
"st": "street",
|
||||
"st.": "street",
|
||||
"ave": "avenue",
|
||||
"ave.": "avenue",
|
||||
"ln": "lane",
|
||||
"ln.": "lane",
|
||||
"cres": "crescent",
|
||||
"ct": "court",
|
||||
"dr": "drive",
|
||||
# flats / units
|
||||
"apt": "flat",
|
||||
"apartment": "flat",
|
||||
"unit": "flat",
|
||||
"ste": "suite",
|
||||
# numbering noise
|
||||
"no": "",
|
||||
"no.": "",
|
||||
}
|
||||
# 1. lowercase
|
||||
s = s.lower()
|
||||
|
||||
# 1.5 split digit-letter suffixes
|
||||
s = re.sub(r"(\d+)([a-z])\b", r"\1 \2", s)
|
||||
|
||||
# 2. remove punctuation except /
|
||||
s = re.sub(r"[^\w\s/]", " ", s)
|
||||
|
||||
# 3. normalise whitespace
|
||||
s = re.sub(r"\s+", " ", s).strip()
|
||||
|
||||
# 4. tokenise + synonym normalisation
|
||||
tokens = []
|
||||
for tok in s.split():
|
||||
replacement = ADDRESS_SYNONYMS.get(tok, tok)
|
||||
if replacement:
|
||||
tokens.append(replacement)
|
||||
|
||||
return " ".join(tokens)
|
||||
|
||||
|
||||
def score_addresses(
|
||||
df: pd.DataFrame,
|
||||
user_address: str,
|
||||
column: str = "address",
|
||||
) -> pd.Series:
|
||||
if column not in df.columns:
|
||||
raise ValueError(f"Missing column: {column}")
|
||||
|
||||
return df[column].apply(lambda x: levenshtein(user_address, x))
|
||||
|
||||
|
||||
def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
|
||||
"""
|
||||
Recursively fetch EPC data by postcode.
|
||||
If results hit the size limit, retry with double size up to max_attempts.
|
||||
"""
|
||||
client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
||||
|
||||
url = os.path.join(client.domestic.host, "search")
|
||||
|
||||
if size:
|
||||
url += "?" + urlencode({"size": size})
|
||||
|
||||
search_resp = client.domestic.call(
|
||||
url=url,
|
||||
method="get",
|
||||
params={"postcode": postcode},
|
||||
)
|
||||
if not search_resp or "rows" not in search_resp:
|
||||
return pd.DataFrame()
|
||||
|
||||
results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"])
|
||||
|
||||
row_count = len(results_df)
|
||||
|
||||
# If we hit the size limit, there *may* be more results
|
||||
if row_count == size:
|
||||
print(
|
||||
f"⚠️ Warning: hit size limit ({size}) for postcode '{postcode}'. "
|
||||
f"Attempt {attempt}/{max_attempts}."
|
||||
)
|
||||
|
||||
if attempt < max_attempts:
|
||||
print(f"🔁 Retrying with size={size * 2}")
|
||||
return get_epc_data_with_postcode(
|
||||
postcode=postcode,
|
||||
size=size * 2,
|
||||
attempt=attempt + 1,
|
||||
max_attempts=max_attempts,
|
||||
)
|
||||
else:
|
||||
print(
|
||||
"🚨 Max attempts reached. Results may be truncated. "
|
||||
"(Please do a manual review by the tech team.)"
|
||||
)
|
||||
|
||||
return results_df
|
||||
|
||||
|
||||
def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
|
||||
"""
|
||||
Returns True if all non-null UPRNs in df match the given uprn.
|
||||
Returns False otherwise.
|
||||
"""
|
||||
|
||||
if column not in df.columns:
|
||||
return False
|
||||
|
||||
# Drop nulls and normalise to string
|
||||
uprns = df[column].dropna().astype(str).str.strip().unique()
|
||||
|
||||
# No valid UPRNs to compare
|
||||
if len(uprns) == 0:
|
||||
return False
|
||||
|
||||
# Exactly one unique UPRN and it matches
|
||||
return len(uprns) == 1 and uprns[0] == str(uprn)
|
||||
|
||||
|
||||
def get_uprn_candidates(
|
||||
df: pd.DataFrame,
|
||||
user_address: str,
|
||||
address_column: str = "address",
|
||||
uprn_column: str = "uprn",
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Annotate EPC results with lexicographical similarity scores and ranks.
|
||||
|
||||
Returns a DataFrame sorted by descending lexiscore.
|
||||
DOES NOT choose or return a UPRN.
|
||||
"""
|
||||
|
||||
if address_column not in df.columns:
|
||||
raise ValueError(f"Missing column: {address_column}")
|
||||
|
||||
if uprn_column not in df.columns:
|
||||
raise ValueError(f"Missing column: {uprn_column}")
|
||||
|
||||
out = df.copy()
|
||||
|
||||
user_norm = normalise_address(user_address)
|
||||
|
||||
out["lexiscore"] = out[address_column].apply(lambda x: levenshtein(user_norm, x))
|
||||
|
||||
# Normalise UPRN to string
|
||||
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
|
||||
|
||||
# Rank: 1 = best match
|
||||
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
|
||||
|
||||
return out.sort_values(
|
||||
["lexirank", "lexiscore"],
|
||||
ascending=[True, False],
|
||||
)
|
||||
|
||||
|
||||
def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
|
||||
"""
|
||||
Return uprn (str)
|
||||
Return False if failed to find a sensible matching epc
|
||||
Return Nons when epc found but no UPRN
|
||||
"""
|
||||
df = get_epc_data_with_postcode(postcode=postcode)
|
||||
|
||||
if df.empty:
|
||||
return None
|
||||
|
||||
scored_df = get_uprn_candidates(
|
||||
df,
|
||||
user_address=user_inputed_address,
|
||||
)
|
||||
|
||||
# Best score
|
||||
best_score = scored_df.iloc[0]["lexiscore"]
|
||||
|
||||
if best_score <= 0:
|
||||
return None
|
||||
|
||||
# All rank-1 rows (possible draw)
|
||||
top_rank_df = scored_df[scored_df["lexirank"] == 1]
|
||||
|
||||
# If rank-1 rows do not agree on a single UPRN → ambiguous
|
||||
if not df_has_single_uprn(top_rank_df, uprn=top_rank_df.iloc[0]["uprn"]):
|
||||
return None
|
||||
|
||||
address = top_rank_df["address"].values[0]
|
||||
lexiscore = float(top_rank_df["lexiscore"].values[0])
|
||||
|
||||
logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
|
||||
# Safe to return the agreed UPRN
|
||||
found_uprn = top_rank_df.iloc[0]["uprn"]
|
||||
|
||||
if found_uprn == "":
|
||||
return None
|
||||
|
||||
if return_address:
|
||||
return found_uprn, address
|
||||
return found_uprn
|
||||
|
||||
|
||||
def resolve_uprns_for_postcode_group(
|
||||
group_df: pd.DataFrame,
|
||||
epc_df: pd.DataFrame,
|
||||
address_col: str = "Address 1",
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Given:
|
||||
- group_df: rows sharing the same postcode
|
||||
- epc_df: EPC search results for that postcode
|
||||
|
||||
Returns:
|
||||
group_df + found_uprn + diagnostics
|
||||
"""
|
||||
|
||||
results = []
|
||||
|
||||
for _, row in group_df.iterrows():
|
||||
user_address = str(row[address_col]).strip()
|
||||
|
||||
scored_df = get_uprn_candidates(
|
||||
epc_df,
|
||||
user_address=user_address,
|
||||
)
|
||||
|
||||
if scored_df.empty:
|
||||
results.append(
|
||||
{
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": None,
|
||||
"best_match_address": None,
|
||||
"best_match_lexiscore": None,
|
||||
"status": "no_epc_candidates",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
best_score = scored_df.iloc[0]["lexiscore"]
|
||||
|
||||
if best_score <= 0:
|
||||
results.append(
|
||||
{
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": None,
|
||||
"best_match_address": None,
|
||||
"best_match_lexiscore": best_score,
|
||||
"status": "zero_score",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
top_rank_df = scored_df[scored_df["lexirank"] == 1]
|
||||
|
||||
if not df_has_single_uprn(top_rank_df, top_rank_df.iloc[0]["uprn"]):
|
||||
results.append(
|
||||
{
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": top_rank_df.iloc[0]["uprn"],
|
||||
"best_match_address": top_rank_df.iloc[0]["address"],
|
||||
"best_match_lexiscore": best_score,
|
||||
"status": "ambiguous",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
results.append(
|
||||
{
|
||||
"found_uprn": str(top_rank_df.iloc[0]["uprn"]),
|
||||
"best_match_uprn": str(top_rank_df.iloc[0]["uprn"]),
|
||||
"best_match_address": top_rank_df.iloc[0]["address"],
|
||||
"best_match_lexiscore": best_score,
|
||||
"status": "matched",
|
||||
}
|
||||
)
|
||||
|
||||
return pd.concat(
|
||||
[group_df.reset_index(drop=True), pd.DataFrame(results)],
|
||||
axis=1,
|
||||
)
|
||||
|
||||
|
||||
def test(a, b):
|
||||
assert a == b, f"erorr: {a}{type(a)} != {b}: {type(b)}"
|
||||
|
||||
|
||||
def run_all_test():
|
||||
# Basic usage with different post codes styles
|
||||
test(get_epc_data_with_postcode("b93 8sy").shape[0], 63)
|
||||
test(get_epc_data_with_postcode("B938sy").shape[0], 63)
|
||||
test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
|
||||
test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
|
||||
|
||||
test(get_uprn("68", "b93 8sy"), "100070989938")
|
||||
test(get_uprn("68 Glendon Way", "b93 8sy"), "100070989938")
|
||||
test(get_uprn("Flat A, 28, Nelgarde Road", "se6 4tf"), "100023278633")
|
||||
test(get_uprn("28 A", "se6 4tf"), "100023278633")
|
||||
test(get_uprn("28A", "se6 4tf"), "100023278633")
|
||||
test(get_uprn("6 Aitken Close", "E8 4SQ"), False)
|
||||
|
||||
# unique case
|
||||
test(get_uprn("Flat 5, 1, Semley Gate", "e9 5nh"), "10008238198")
|
||||
test(get_uprn("5 , 1 Semley Gate", "e9 5nh"), "10008238198")
|
||||
test(get_uprn("5 Semley Gate", "e9 5nh"), "10008238198")
|
||||
test(get_uprn("1, 5 Semley Gate", "e9 5nh"), False)
|
||||
test(
|
||||
get_uprn("1 Semley Gate", "e9 5nh"), "10008238188"
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
test(
|
||||
get_uprn("48 Oswald Street", "E5 0BT"), False
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
test(
|
||||
get_uprn("42 Oswald Street", "E5 0BT"), False
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
test(
|
||||
get_uprn("46 Oswald Street", "E5 0BT"), False
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street")
|
||||
get_uprn_candidates(
|
||||
get_epc_data_with_postcode("Cr2 7dl"),
|
||||
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
INPUT_FILE = "hackney.xlsx"
|
||||
|
||||
ADDRESS_COL = "Address 1"
|
||||
POSTCODE_COL = "Postcode"
|
||||
UPRN_COL = "UPRN"
|
||||
|
||||
df = pd.read_excel(INPUT_FILE)
|
||||
|
||||
failures = []
|
||||
|
||||
for _, row in tqdm(
|
||||
df.iterrows(),
|
||||
total=len(df),
|
||||
desc="Auditing UPRNs",
|
||||
):
|
||||
input_address = str(row[ADDRESS_COL]).strip()
|
||||
postcode = str(row[POSTCODE_COL]).strip()
|
||||
|
||||
expected_uprn = None if pd.isna(row[UPRN_COL]) else str(int(row[UPRN_COL]))
|
||||
|
||||
try:
|
||||
epc_df = get_epc_data_with_postcode(postcode)
|
||||
|
||||
if epc_df.empty:
|
||||
failures.append(
|
||||
{
|
||||
**row.to_dict(),
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": None,
|
||||
"best_match_address": None,
|
||||
"best_match_lexiscore": None,
|
||||
"status": "no_epc_results",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
scored_df = get_uprn_candidates(
|
||||
epc_df,
|
||||
user_address=input_address,
|
||||
)
|
||||
|
||||
best_row = scored_df.iloc[0]
|
||||
|
||||
best_match_uprn = str(best_row["uprn"])
|
||||
best_match_address = best_row["address"]
|
||||
best_match_lexiscore = round(float(best_row["lexiscore"]), 4)
|
||||
|
||||
found_uprn = get_uprn(input_address, postcode)
|
||||
|
||||
except Exception as e:
|
||||
failures.append(
|
||||
{
|
||||
**row.to_dict(),
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": None,
|
||||
"best_match_address": None,
|
||||
"best_match_lexiscore": None,
|
||||
"status": "exception",
|
||||
"error": str(e),
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
found_uprn_norm = None if not found_uprn else str(found_uprn)
|
||||
|
||||
if found_uprn_norm != expected_uprn:
|
||||
failures.append(
|
||||
{
|
||||
**row.to_dict(),
|
||||
"found_uprn": found_uprn_norm,
|
||||
"best_match_uprn": best_match_uprn,
|
||||
"best_match_address": best_match_address,
|
||||
"best_match_lexiscore": best_match_lexiscore,
|
||||
"status": ("no_match" if found_uprn_norm is None else "mismatch"),
|
||||
}
|
||||
)
|
||||
|
||||
failures_df = pd.DataFrame(failures)
|
||||
|
||||
print("===================================")
|
||||
print(f"Total rows : {len(df)}")
|
||||
print(f"Failures : {len(failures_df)}")
|
||||
print("===================================")
|
||||
|
||||
failures_df.to_excel(
|
||||
"hackney_uprn_failures.xlsx",
|
||||
index=False,
|
||||
)
|
||||
|
||||
|
||||
def handler(event, context):
|
||||
print("hello world")
|
||||
return {"statusCode": 200, "body": "hello world"}
|
||||
|
||||
|
||||
# TO do function dispatcher,
|
||||
|
||||
# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate)
|
||||
# fix that
|
||||
# Look again at flat 1
|
||||
# pandas reader the seperate postcode_splitter
|
||||
# dump into s3
|
||||
24
backend/address2UPRN/script.py
Normal file
24
backend/address2UPRN/script.py
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from backend.address2UPRN.main import get_uprn
|
||||
|
||||
# Enable tqdm for pandas
|
||||
tqdm.pandas()
|
||||
|
||||
df = pd.read_excel("address2.xlsx")
|
||||
|
||||
|
||||
def extract_uprn(row):
|
||||
print(row["User Input"], row["Postcode"])
|
||||
result = get_uprn(row["User Input"], row["Postcode"], return_address=True)
|
||||
|
||||
if result is None:
|
||||
return pd.Series([None, None])
|
||||
|
||||
uprn, found_address = result
|
||||
return pd.Series([uprn, found_address])
|
||||
|
||||
|
||||
df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1)
|
||||
|
||||
df.to_excel("outputs2.xlsx", index=False)
|
||||
40
backend/address2UPRN/tests/test_csv.py
Normal file
40
backend/address2UPRN/tests/test_csv.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# tests/test_address_to_uprn_csv.py
|
||||
|
||||
import csv
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from backend.address2UPRN.main import get_uprn
|
||||
|
||||
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
|
||||
|
||||
|
||||
def load_test_cases():
|
||||
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
|
||||
reader = csv.DictReader(f)
|
||||
return [
|
||||
pytest.param(
|
||||
row["User Input"],
|
||||
row["Postcode"],
|
||||
row["Manual UPRN Code"],
|
||||
id=f'{row["User Input"]} [{row["Postcode"]}]',
|
||||
)
|
||||
for row in reader
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"user_input,postcode,expected_uprn",
|
||||
load_test_cases(),
|
||||
)
|
||||
def test_uprn_resolution_matches_manual(
|
||||
user_input: str,
|
||||
postcode: str,
|
||||
expected_uprn: str,
|
||||
):
|
||||
from utils.logger import setup_logger
|
||||
|
||||
uprn = get_uprn(user_input, postcode)
|
||||
if uprn:
|
||||
assert uprn == expected_uprn
|
||||
else:
|
||||
assert str(uprn) == expected_uprn
|
||||
366
backend/address2UPRN/tests/test_data.csv
Normal file
366
backend/address2UPRN/tests/test_data.csv
Normal file
|
|
@ -0,0 +1,366 @@
|
|||
User Input,Postcode,Manual UPRN Code
|
||||
47 The Fairway,OX16 0RR,100120771697
|
||||
11 REGENT COURT,SL1 3LG,100081041562
|
||||
3/137a Windmill Road,TW8 9NH,100021516998
|
||||
Flat 33,SW18 4BE,100023328943
|
||||
FLAT 1 Brendon Grove,N2 8JE,200013412
|
||||
Flat 15,KT8 2NE,100062123759
|
||||
FLAT 5 Stonehill Road,W4 3AH,100021589829
|
||||
10 Douglas Court,SL7 1UQ,100081278099
|
||||
1 Windmill Road,HP17 8JA,766034606
|
||||
31 Denewood,HP13 7LH,100081095964
|
||||
"10, Greenways Drive",TW4 5DD,10091597009
|
||||
Flat 10,W4 3AH,"100021589834"
|
||||
Flat 11,TW4 5DD,10091597010
|
||||
Flat 11,W4 3AH,100021589835
|
||||
"12, Greenways Drive",TW4 5DD,10091597011
|
||||
"Flat 12, Forbes House",W4 3AH,100021589836
|
||||
FLAT 1 Goodstone Court,HA1 4FL,10070269053
|
||||
Flat 13,TW4 5DD,10091597012
|
||||
Flat 13,W4 3AH,100021589837
|
||||
Flat 14,TW4 5DD,10091597013
|
||||
Flat 14,W4 3AH,100021589838
|
||||
Flat 15,TW4 5DD,10091597014
|
||||
Flat 15,W4 3AH,100021589839
|
||||
Flat 16,TW4 5DD,"10091597015"
|
||||
Flat 16,W4 3AH,100021589840
|
||||
Flat 17,TW4 5DD,10091597016
|
||||
Flat 17,W4 3AH,100021589841
|
||||
Flat 18,TW4 5DD,10091597017
|
||||
Flat 19,W4 3AH,100021589843
|
||||
Flat 20,W4 3AH,100021589844
|
||||
Flat 21,W4 3AH,100021589845
|
||||
Flat 22,W4 3AH,100021589846
|
||||
FLAT 2 Goodstone Court,HA1 4FL,10070269054
|
||||
Flat 23,W4 3AH,100021589847
|
||||
Flat 24,W4 3AH,100021589848
|
||||
"30c, Bosanquet Close",UB8 3PE,100021475316
|
||||
"30e, Bosanquet Close",UB8 3PE,100021475318
|
||||
FLAT 3 Goodstone Court,HA1 4FL,10070269055
|
||||
FLAT 4 Goodstone Court,HA1 4FL,10070269056
|
||||
FLAT 5 Goodstone Court,HA1 4FL,10070269057
|
||||
FLAT 6 Goodstone Court,HA1 4FL,10070269058
|
||||
FLAT 7 Goodstone Court,HA1 4FL,10070269059
|
||||
FLAT 8 Goodstone Court,HA1 4FL,10070269060
|
||||
FLAT 9 Goodstone Court,HA1 4FL,10070269061
|
||||
FLAT 10 Goodstone Court,HA1 4FL,10070269062
|
||||
FLAT 11 Goodstone Court,HA1 4FL,10070269063
|
||||
FLAT 12 Goodstone Court,HA1 4FL,10070269064
|
||||
FLAT 13 Goodstone Court,HA1 4FL,10070269065
|
||||
FLAT 14 Goodstone Court,HA1 4FL,10070269066
|
||||
FLAT 15 Goodstone Court,HA1 4FL,10070269067
|
||||
FLAT 16 Goodstone Court,HA1 4FL,10070269068
|
||||
FLAT 17 Goodstone Court,HA1 4FL,10070269069
|
||||
FLAT 18 Goodstone Court,HA1 4FL,10070269070
|
||||
FLAT 19 Goodstone Court,HA1 4FL,10070269071
|
||||
FLAT 20 Goodstone Court,HA1 4FL,10070269072
|
||||
FLAT 21 Goodstone Court,HA1 4FL,10070269073
|
||||
FLAT 22 Goodstone Court,HA1 4FL,10070269074
|
||||
FLAT 23 Goodstone Court,HA1 4FL,10070269075
|
||||
FLAT 24 Goodstone Court,HA1 4FL,10070269076
|
||||
FLAT 25 Goodstone Court,HA1 4FL,10070269077
|
||||
FLAT 26 Goodstone Court,HA1 4FL,10070269078
|
||||
FLAT 27 Goodstone Court,HA1 4FL,10070269079
|
||||
FLAT 28 Goodstone Court,HA1 4FL,10070269080
|
||||
FLAT 29 Goodstone Court,HA1 4FL,10070269081
|
||||
FLAT 30 Goodstone Court,HA1 4FL,10070269082
|
||||
FLAT 31 Goodstone Court,HA1 4FL,10070269083
|
||||
FLAT 32 Goodstone Court,HA1 4FL,10070269084
|
||||
FLAT 33 Goodstone Court,HA1 4FL,10070269085
|
||||
FLAT 34 Goodstone Court,HA1 4FL,10070269086
|
||||
FLAT 35 Goodstone Court,HA1 4FL,10070269087
|
||||
FLAT 36 Goodstone Court,HA1 4FL,10070269088
|
||||
FLAT 37 Goodstone Court,HA1 4FL,10070269089
|
||||
FLAT 38 Goodstone Court,HA1 4FL,10070269090
|
||||
FLAT 39 Goodstone Court,HA1 4FL,10070269091
|
||||
FLAT 40 Goodstone Court,HA1 4FL,10070269092
|
||||
FLAT 41 Goodstone Court,HA1 4FL,10070269093
|
||||
FLAT 42 Goodstone Court,HA1 4FL,10070269094
|
||||
FLAT 43 Goodstone Court,HA1 4FL,10070269095
|
||||
"13 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778260
|
||||
"14 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778259
|
||||
"15 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778258
|
||||
"16 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778263
|
||||
"17 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778262
|
||||
"18 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778261
|
||||
"19 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778266
|
||||
"20 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778265
|
||||
"21 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778264
|
||||
90a Murray Road,W5 4DA,12135293
|
||||
"Flat 1, 6 Wolverton Gardens",W5 3LJ,"12119972"
|
||||
"1, Monsted House",UB1 1FG,12189944
|
||||
"10, Monsted House",UB1 1FG,12189953
|
||||
"20, Monsted House",UB1 1FG,12189963
|
||||
"2, Monsted House",UB1 1FG,12189945
|
||||
"3, Monsted House",UB1 1FG,12189946
|
||||
"4, Monsted House",UB1 1FG,12189947
|
||||
"5, Monsted House",UB1 1FG,12189948
|
||||
"6, Monsted House",UB1 1FG,12189949
|
||||
"7, Monsted House",UB1 1FG,12189950
|
||||
"8, Monsted House",UB1 1FG,12189951
|
||||
"9, Monsted House",UB1 1FG,12189952
|
||||
"1 Cullis House, 1, Accolade Avenue",UB1 1FH,12189904
|
||||
"2 Cullis House, 1, Accolade Avenue",UB1 1FH,12189905
|
||||
"3 Cullis House, 1, Accolade Avenue",UB1 1FH,12189906
|
||||
"4 Cullis House, 1, Accolade Avenue",UB1 1FH,12189907
|
||||
"5 Cullis House, 1, Accolade Avenue",UB1 1FH,12189908
|
||||
"6 Cullis House, 1, Accolade Avenue",UB1 1FH,12189909
|
||||
1 Genteel House Samara Drive,UB1 1FJ,12189835
|
||||
2 Genteel House Samara Drive,UB1 1FJ,12189836
|
||||
3 Genteel House Samara Drive,UB1 1FJ,12189837
|
||||
4 Genteel House Samara Drive,UB1 1FJ,12189838
|
||||
5 Genteel House Samara Drive,UB1 1FJ,12189839
|
||||
6 Genteel House Samara Drive,UB1 1FJ,12189840
|
||||
7 Genteel House Samara Drive,UB1 1FJ,12189841
|
||||
8 Genteel House Samara Drive,UB1 1FJ,12189842
|
||||
9 Genteel House Samara Drive,UB1 1FJ,12189843
|
||||
10 Genteel House Samara Drive,UB1 1FJ,12189844
|
||||
1 ASH TREE HOUSE,SE5 0TE,None
|
||||
"Flat 1 Ash Tree House, 2, Thompson Avenue",SE5 0TE,10009803979
|
||||
3 ASH TREE HOUSE,SE5 0TE,None
|
||||
Flat 3 ASH TREE HOUSE,SE5 0TE,10009803981
|
||||
5 ASH TREE HOUSE,SE5 0TE,None
|
||||
Flat 5 ASH TREE HOUSE,SE5 0TE,10009803983
|
||||
Flat 8 ASH TREE HOUSE,SE5 0TE,10009803986
|
||||
8 ASH TREE HOUSE,SE5 0TE,None
|
||||
Flat 12 ASH TREE HOUSE,SE5 0TE,10009803990
|
||||
12 ASH TREE HOUSE,SE5 0TE,None
|
||||
FLAT 1 599 HARROW ROAD,W10 4RA,217113930
|
||||
FLAT 2 599 HARROW ROAD,W10 4RA,217113931
|
||||
FLAT 3 599 HARROW ROAD,W10 4RA,None
|
||||
FLAT 4 599 HARROW ROAD,W10 4RA,None
|
||||
FLAT 5 599 HARROW ROAD,W10 4RA,217113934
|
||||
FLAT 6 599 HARROW ROAD,W10 4RA,None
|
||||
FLAT 7 599 HARROW ROAD,W10 4RA,None
|
||||
FLAT 8 599 HARROW ROAD,W10 4RA,None
|
||||
"Flat 1, Ohio Building",SE13 7RX,10023226256
|
||||
"Flat 2, Ohio Building",SE13 7RX,10023226257
|
||||
"Apartment 1 Block B, 105, Benwell Road",N7 7BW,10012792307
|
||||
"Apartment 2 Block B, 105, Benwell Road",N7 7BW,10012792308
|
||||
"Apartment 3 Block B, 105, Benwell Road",N7 7BW,10012792309
|
||||
"Apartment 4 Block B, 105, Benwell Road",N7 7BW,10012792310
|
||||
"Apartment 5 Block B, 105, Benwell Road",N7 7BW,10012792311
|
||||
"Apartment 6 Block B, 105, Benwell Road",N7 7BW,10012792312
|
||||
"Apartment 7 Block B, 105, Benwell Road",N7 7BW,10012792313
|
||||
"Apartment 8 Block B, 105, Benwell Road",N7 7BW,10012792314
|
||||
"Apartment 9 Block B, 105, Benwell Road",N7 7BW,10012792315
|
||||
"Apartment 10 Block B, 105, Benwell Road",N7 7BW,10012792316
|
||||
"Apartment 11 Block B, 105, Benwell Road",N7 7BW,10012792317
|
||||
"Apartment 12 Block B, 105, Benwell Road",N7 7BW,10012792318
|
||||
"Apartment 13 Block B, 105, Benwell Road",N7 7BW,10012792319
|
||||
"Apartment 1 Block D, 32, Hornsey Road",N7 7AT,10012792366
|
||||
"Apartment 2 Block D, 32, Hornsey Road",N7 7AT,10012792367
|
||||
"Apartment 3 Block D, 32, Hornsey Road",N7 7AT,10012792368
|
||||
"Apartment 4 Block D, 32, Hornsey Road",N7 7AT,10012792369
|
||||
"Apartment 5 Block D, 32, Hornsey Road",N7 7AT,10012792370
|
||||
"Apartment 6 Block D, 32, Hornsey Road",N7 7AT,"10012792371"
|
||||
"Apartment 7 Block D, 32, Hornsey Road",N7 7AT,10012792372
|
||||
"Apartment 8 Block D, 32, Hornsey Road",N7 7AT,10012792373
|
||||
"Apartment 9 Block D, 32, Hornsey Road",N7 7AT,10012792374
|
||||
"Apartment 10 Block D, 32, Hornsey Road",N7 7AT,10012792375
|
||||
"Apartment 11 Block D, 32, Hornsey Road",N7 7AT,10012792376
|
||||
"Apartment 12 Block D, 32, Hornsey Road",N7 7AT,10012792377
|
||||
"Apartment 13 Block D, 32, Hornsey Road",N7 7AT,10012792378
|
||||
"Apartment 14 Block D, 32, Hornsey Road",N7 7AT,10012792379
|
||||
"Apartment 15 Block D, 32, Hornsey Road",N7 7AT,10012792380
|
||||
"Apartment 16 Block D, 32, Hornsey Road",N7 7AT,"10012792381"
|
||||
"Apartment 17Block D, 32, Hornsey Road",N7 7AT,10012792382
|
||||
"Apartment 18 Block D, 32, Hornsey Road",N7 7AT,10012792383
|
||||
24b Honley Road,SE6 2HZ,None
|
||||
FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
|
||||
2 COLLEGE HOUSE,CM7 1JS,100091449870
|
||||
3 COLLEGE HOUSE,CM7 1JS,100091449871
|
||||
1 Anita Street,M4 5DU,None
|
||||
2 Anita Street,M4 5DU,77123061
|
||||
5 Anita Street,M4 5DU,77123081
|
||||
6 Anita Street,M4 5DU,77123082
|
||||
8 Anita Street,M4 5DU,None
|
||||
9 Anita Street,M4 5DU,None
|
||||
10 Anita Street,M4 5DU,77123051
|
||||
12 Anita Street,M4 5DU,77123053
|
||||
19 Anita Street,M4 5DU,None
|
||||
22 Anita Street,M4 5DU,None
|
||||
26 Anita Street,M4 5DU,77123068
|
||||
28 Anita Street,M4 5DU,None
|
||||
30 Anita Street,M4 5DU,None
|
||||
32 Anita Street,M4 5DU,None
|
||||
33 Anita Street,M4 5DU,77123076
|
||||
34 Anita Street,M4 5DU,None
|
||||
35 Anita Street,M4 5DU,77123078
|
||||
36 Anita Street,M4 5DU,77123079
|
||||
23 George Leigh Street,M4 5DR,77123171
|
||||
25 George Leigh Street,M4 5DR,None
|
||||
35 George Leigh Street,M4 5DR,77123177
|
||||
39 George Leigh Street,M4 5DR,77123179
|
||||
41 George Leigh Street,M4 5DR,None
|
||||
43 George Leigh Street,M4 5DR,None
|
||||
49 George Leigh Street,M4 5DR,None
|
||||
51 George Leigh Street,M4 5DR,77123185
|
||||
55 George Leigh Street,M4 5DR,None
|
||||
57 George Leigh Street,M4 5DR,None
|
||||
"1a, Victoria Square",M4 5DX,77211153
|
||||
2a Victoria Square ,M4 5DX,None
|
||||
"4a, Victoria Square",M4 5DX,77211155
|
||||
5a Victoria Square,M4 5DX,77211156
|
||||
6a Victoria Square,M4 5DX,77211157
|
||||
7a Victoria Square,M4 5DX,77211158
|
||||
8a Victoria Square,M4 5DX,77211159
|
||||
9a Victoria Square,M4 5DX,77211160
|
||||
10a Victoria Square,M4 5DX,77211161
|
||||
11a Victoria Square,M4 5DX,77211162
|
||||
12a Victoria Square,M4 5DX,77211163
|
||||
13a Victoria Square,M4 5DX,77211164
|
||||
14a Victoria Square,M4 5DX,77211165
|
||||
15a Victoria Square,M4 5DX,77211166
|
||||
16a Victoria Square,M4 5DX,77211167
|
||||
17a Victoria Square,M4 5DX,77211168
|
||||
18a Victoria Square,M4 5DX,77211169
|
||||
19a Victoria Square,M4 5DX,77211170
|
||||
20a Victoria Square,M4 5DX,77211171
|
||||
21a Victoria Square,M4 5DY,77211172
|
||||
22a Victoria Square,M4 5DY,None
|
||||
23a Victoria Square,M4 5DY,77211174
|
||||
24a Victoria Square,M4 5DY,77211175
|
||||
25a Victoria Square,M4 5DY,77211176
|
||||
26a Victoria Square,M4 5DY,77211177
|
||||
27a Victoria Square,M4 5DY,77211178
|
||||
28a Victoria Square,M4 5DY,None
|
||||
29a Victoria Square,M4 5DY,77211180
|
||||
30a Victoria Square,M4 5DY,77211181
|
||||
31a Victoria Square,M4 5DY,77211182
|
||||
32a Victoria Square,M4 5DY,77211183
|
||||
33a Victoria Square,M4 5DY,77211184
|
||||
34a Victoria Square,M4 5DY,77211185
|
||||
35a Victoria Square,M4 5DY,None
|
||||
36a Victoria Square,M4 5DY,77211187
|
||||
37a Victoria Square,M4 5DY,77211188
|
||||
38a Victoria Square,M4 5DY,77211189
|
||||
39a Victoria Square,M4 5DY,77211190
|
||||
40a Victoria Square,M4 5DY,None
|
||||
41a Victoria Square,M4 5DY,77211192
|
||||
42a Victoria Square,M4 5DY,77211193
|
||||
43a Victoria Square,M4 5DY,77211194
|
||||
44a Victoria Square,M4 5DY,77211195
|
||||
45a Victoria Square,M4 5DY,77211196
|
||||
46a Victoria Square,M4 5DY,77211197
|
||||
47a Victoria Square,M4 5DY,77211198
|
||||
48a Victoria Square,M4 5DY,77211199
|
||||
49a Victoria Square,M4 5DY,77211200
|
||||
50a Victoria Square,M4 5DY,77211201
|
||||
51a Victoria Square,M4 5DY,77211202
|
||||
52a Victoria Square,M4 5DY,77211203
|
||||
53a Victoria Square,M4 5DY,77211204
|
||||
54a Victoria Square,M4 5DY,77211205
|
||||
55a Victoria Square,M4 5DY,77211206
|
||||
56a Victoria Square,M4 5DZ,77211207
|
||||
57a Victoria Square,M4 5DZ,None
|
||||
58a Victoria Square,M4 5DZ,77211209
|
||||
59a Victoria Square,M4 5DZ,77211210
|
||||
60a Victoria Square,M4 5DZ,77211211
|
||||
61a Victoria Square,M4 5DZ,77211212
|
||||
62a Victoria Square,M4 5DZ,77211213
|
||||
63a Victoria Square,M4 5DZ,None
|
||||
64a Victoria Square,M4 5DZ,77211215
|
||||
65a Victoria Square,M4 5DZ,77211216
|
||||
66a Victoria Square,M4 5DZ,None
|
||||
67a Victoria Square,M4 5DZ,None
|
||||
68a Victoria Square,M4 5DZ,77211219
|
||||
69a Victoria Square,M4 5DZ,77211220
|
||||
70a Victoria Square,M4 5DZ,77211221
|
||||
71a Victoria Square,M4 5DZ,77211222
|
||||
72a Victoria Square,M4 5DZ,77211223
|
||||
73a Victoria Square,M4 5DZ,77211224
|
||||
74a Victoria Square,M4 5DZ,None
|
||||
75a Victoria Square,M4 5DZ,77211226
|
||||
76a Victoria Square,M4 5DZ,77211227
|
||||
77a Victoria Square,M4 5DZ,None
|
||||
78a Victoria Square,M4 5DZ,77211229
|
||||
79a Victoria Square,M4 5DZ,77211230
|
||||
80a Victoria Square,M4 5DZ,77211231
|
||||
81a Victoria Square,M4 5DZ,77211232
|
||||
82 Victoria Square,M4 5DZ,None
|
||||
83a Victoria Square,M4 5DZ,77211234
|
||||
84a Victoria Square,M4 5DZ,None
|
||||
85a Victoria Square,M4 5DZ,77211236
|
||||
86a Victoria Square,M4 5DZ,77211237
|
||||
87a Victoria Square,M4 5DZ,77211238
|
||||
88a Victoria Square,M4 5DZ,None
|
||||
89a Victoria Square,M4 5DZ,77211240
|
||||
90a Victoria Square,M4 5DZ,77211241
|
||||
91a Victoria Square,M4 5DZ,77211242
|
||||
92a Victoria Square,M4 5DZ,77211243
|
||||
93a Victoria Square,M4 5EA,77211244
|
||||
94a Victoria Square,M4 5EA,None
|
||||
95a Victoria Square,M4 5EA,77211246
|
||||
96a Victoria Square,M4 5EA,77211247
|
||||
97a Victoria Square,M4 5EA,77211248
|
||||
98a Victoria Square,M4 5EA,77211249
|
||||
99a Victoria Square,M4 5EA,77211250
|
||||
100a Victoria Square,M4 5EA,77211251
|
||||
101a Victoria Square,M4 5EA,None
|
||||
102a Victoria Square,M4 5EA,None
|
||||
103a Victoria Square,M4 5EA,77211254
|
||||
104a Victoria Square,M4 5EA,77211255
|
||||
105a Victoria Square,M4 5EA,None
|
||||
106a Victoria Square,M4 5EA,77211257
|
||||
107a Victoria Square,M4 5EA,77211258
|
||||
108a Victoria Square,M4 5EA,77211259
|
||||
109a Victoria Square,M4 5EA,77211260
|
||||
110a Victoria Square,M4 5EA,77211261
|
||||
111a Victoria Square,M4 5EA,77211262
|
||||
112a Victoria Square,M4 5EA,None
|
||||
113a Victoria Square,M4 5EA,77211264
|
||||
114a Victoria Square,M4 5EA,77211265
|
||||
115a Victoria Square,M4 5EA,77211266
|
||||
116a Victoria Square,M4 5EA,77211267
|
||||
117a Victoria Square,M4 5EA,None
|
||||
118a Victoria Square,M4 5EA,None
|
||||
119a Victoria Square,M4 5EA,77211270
|
||||
120a Victoria Square,M4 5EA,77211271
|
||||
121a Victoria Square,M4 5EA,77211272
|
||||
122a Victoria Square,M4 5EA,77211273
|
||||
123a Victoria Square,M4 5EA,77211274
|
||||
124a Victoria Square,M4 5EA,None
|
||||
125a Victoria Square,M4 5EA,77211276
|
||||
126a Victoria Square,M4 5EA,77211277
|
||||
127a Victoria Square,M4 5EA,77211278
|
||||
128a Victoria Square,M4 5EA,77211279
|
||||
129a Victoria Square,M4 5EA,77211280
|
||||
130a Victoria Square,M4 5FA,77211281
|
||||
131a Victoria Square,M4 5FA,77211282
|
||||
132a Victoria Square,M4 5FA,77211283
|
||||
133a Victoria Square,M4 5FA,None
|
||||
134a Victoria Square,M4 5FA,77211285
|
||||
135a Victoria Square,M4 5FA,77211286
|
||||
136a Victoria Square,M4 5FA,77211287
|
||||
137a Victoria Square,M4 5FA,77211288
|
||||
138a Victoria Square,M4 5FA,77211289
|
||||
139a Victoria Square,M4 5FA,77211290
|
||||
140a Victoria Square,M4 5FA,77211291
|
||||
141a Victoria Square,M4 5FA,77211292
|
||||
142a Victoria Square,M4 5FA,77211293
|
||||
143a Victoria Square,M4 5FA,77211294
|
||||
144a Victoria Square,M4 5FA,77211295
|
||||
145a Victoria Square,M4 5FA,None
|
||||
146a Victoria Square,M4 5FA,77211297
|
||||
147a Victoria Square,M4 5FA,77211298
|
||||
148a Victoria Square,M4 5FA,77211299
|
||||
149a Victoria Square,M4 5FA,77211300
|
||||
150a Victoria Square,M4 5FA,77211301
|
||||
151a Victoria Square,M4 5FA,None
|
||||
152a Victoria Square,M4 5FA,77211303
|
||||
153a Victoria Square,M4 5FA,None
|
||||
154a Victoria Square,M4 5FA,77211305
|
||||
155a Victoria Square,M4 5FA,None
|
||||
156a Victoria Square,M4 5FA,77211307
|
||||
157a Victoria Square,M4 5FA,77211308
|
||||
158a Victoria Square,M4 5FA,77211309
|
||||
159a Victoria Square,M4 5FA,None
|
||||
160a Victoria Square,M4 5FA,77211311
|
||||
161a Victoria Square,M4 5FA,None
|
||||
162a Victoria Square,M4 5FA,None
|
||||
163a Victoria Square,M4 5FA,77211314
|
||||
164a Victoria Square,M4 5FA,77211315
|
||||
165a Victoria Square,M4 5FA,77211316
|
||||
166a Victoria Square,M4 5FA,None
|
||||
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None
|
||||
|
|
|
@ -1,8 +1,22 @@
|
|||
import os
|
||||
from functools import lru_cache
|
||||
from pydantic_settings import BaseSettings
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def resolve_env_file() -> Optional[str]:
|
||||
env = os.getenv("ENVIRONMENT", "local")
|
||||
|
||||
if env == "local":
|
||||
return "backend/.env"
|
||||
|
||||
if env == "test":
|
||||
return "backend/.env.test"
|
||||
|
||||
# prod = no env file
|
||||
return None
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
API_KEY: str
|
||||
API_KEY_NAME: str = "X-API-KEY"
|
||||
|
|
@ -41,8 +55,10 @@ class Settings(BaseSettings):
|
|||
AWS_SECRET_KEY_ID: Optional[str] = None
|
||||
AWS_DEFAULT_REGION: Optional[str] = None
|
||||
|
||||
class Config:
|
||||
env_file = "backend/.env"
|
||||
model_config = SettingsConfigDict(
|
||||
env_file=resolve_env_file(),
|
||||
env_file_encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
@lru_cache()
|
||||
|
|
|
|||
|
|
@ -3,7 +3,9 @@ from contextlib import contextmanager
|
|||
from backend.app.config import get_settings
|
||||
from sqlmodel import Session
|
||||
|
||||
connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
|
||||
connection_string = (
|
||||
"postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
|
||||
)
|
||||
db_string = connection_string.format(
|
||||
drivername="psycopg2", # You'll need to use psycopg2 driver for PostgreSQL
|
||||
username=get_settings().DB_USERNAME,
|
||||
|
|
@ -28,7 +30,9 @@ db_engine = create_engine(
|
|||
|
||||
def get_db_session():
|
||||
if db_engine is None:
|
||||
raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.")
|
||||
raise RuntimeError(
|
||||
"Database is not configured. Set DATABASE_URL in environment variables."
|
||||
)
|
||||
return Session(db_engine)
|
||||
|
||||
|
||||
|
|
|
|||
12
backend/app/db/functions/condition_functions.py
Normal file
12
backend/app/db/functions/condition_functions.py
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
from typing import List
|
||||
from sqlalchemy import insert, delete
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from backend.app.db.connection import db_session, db_read_session
|
||||
from backend.app.db.models.condition import PropertyConditionSurveyModel
|
||||
|
||||
|
||||
def bulk_insert_property_surveys(
|
||||
session: Session, surveys: List[PropertyConditionSurveyModel]
|
||||
) -> None:
|
||||
raise NotImplementedError
|
||||
97
backend/app/db/models/condition.py
Normal file
97
backend/app/db/models/condition.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
from sqlalchemy import (
|
||||
BigInteger,
|
||||
Column,
|
||||
Date,
|
||||
ForeignKey,
|
||||
Integer,
|
||||
String,
|
||||
Enum as SqlEnum,
|
||||
)
|
||||
from sqlalchemy.orm import declarative_base, relationship
|
||||
|
||||
from backend.condition.domain.aspect_type import AspectType
|
||||
from backend.condition.domain.element_type import ElementType
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
ElementTypeDb = SqlEnum(
|
||||
ElementType,
|
||||
name="element_type",
|
||||
native_enum=True,
|
||||
values_callable=lambda enum: [e.value for e in enum],
|
||||
)
|
||||
|
||||
AspectTypeDb = SqlEnum(
|
||||
AspectType,
|
||||
name="aspect_type",
|
||||
native_enum=True,
|
||||
values_callable=lambda enum: [a.value for a in enum],
|
||||
)
|
||||
|
||||
|
||||
class PropertyConditionSurveyModel(Base):
|
||||
__tablename__ = "property_condition_survey"
|
||||
|
||||
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||
uprn = Column(BigInteger, nullable=False)
|
||||
|
||||
date = Column(Date, nullable=False)
|
||||
source = Column(String, nullable=False)
|
||||
|
||||
elements = relationship(
|
||||
"ElementModel",
|
||||
back_populates="survey",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class ElementModel(Base):
|
||||
__tablename__ = "element" # TODO: rename to survey_element?
|
||||
|
||||
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||
|
||||
survey_id = Column(
|
||||
BigInteger,
|
||||
ForeignKey("property_condition_survey.id"),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
element_type = Column(ElementTypeDb, nullable=False)
|
||||
element_instance = Column(BigInteger, nullable=False)
|
||||
|
||||
survey = relationship(
|
||||
"PropertyConditionSurveyModel",
|
||||
back_populates="elements",
|
||||
)
|
||||
|
||||
aspect_conditions = relationship(
|
||||
"AspectConditionModel",
|
||||
back_populates="element",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class AspectConditionModel(Base):
|
||||
__tablename__ = "aspect_condition" # TODO: rename to survey_aspect?
|
||||
|
||||
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||
|
||||
element_id = Column(
|
||||
BigInteger,
|
||||
ForeignKey("element.id"),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
aspect_type = Column(AspectTypeDb, nullable=False)
|
||||
aspect_instance = Column(BigInteger, nullable=False)
|
||||
|
||||
value = Column(String)
|
||||
quantity = Column(Integer)
|
||||
install_date = Column(Date)
|
||||
renewal_year = Column(Integer)
|
||||
comments = Column(String)
|
||||
|
||||
element = relationship(
|
||||
"ElementModel",
|
||||
back_populates="aspect_conditions",
|
||||
)
|
||||
|
|
@ -24,7 +24,7 @@ def get_cleaned():
|
|||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-{environment}".format(environment=get_settings().ENVIRONMENT)
|
||||
bucket_name=get_settings().DATA_BUCKET
|
||||
)
|
||||
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
# fastapi
|
||||
fastapi==0.115.2
|
||||
sqlalchemy==2.0.36
|
||||
|
|
@ -12,5 +13,4 @@ boto3==1.35.44
|
|||
openpyxl==3.1.2
|
||||
# Basic
|
||||
pytz
|
||||
sqlmodel
|
||||
|
||||
sqlmodel
|
||||
|
|
@ -20,7 +20,7 @@ The processor currently supports file formats provided by **Peabody** and **LBWF
|
|||
|
||||
The `local_runner` script allows the processor to be executed in a local environment.
|
||||
|
||||
1. Copy a sample input file into the `sample_data/` directory.
|
||||
1. Copy sample input file(s) into the `sample_data/` directory. If working with Peabody data, you'll need the Landlord Reference / UPRN lookup file as well.
|
||||
2. Update `local_runner.py` as required, specifically the definitions of:
|
||||
- `lbwf_path`
|
||||
- `peabody_path`
|
||||
|
|
|
|||
33
backend/condition/condition_trigger_request.py
Normal file
33
backend/condition/condition_trigger_request.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
from enum import Enum
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class ConditionFileType(Enum):
|
||||
LBWF = "LBWF"
|
||||
Peabody = "Peabody"
|
||||
# TODO: make these asset management systems rather than client names
|
||||
|
||||
|
||||
class ConditionTriggerRequest(BaseModel):
|
||||
file_type: ConditionFileType
|
||||
trigger_file_bucket: str # TODO: get this from settings
|
||||
trigger_file_key: str
|
||||
|
||||
uprn_lookup_file_bucket: Optional[str] = None # TODO: get this from settings
|
||||
uprn_lookup_file_key: Optional[str] = None
|
||||
|
||||
|
||||
# {
|
||||
# "file_type": "Peabody",
|
||||
# "trigger_file_bucket": "condition-data-dev",
|
||||
# "trigger_file_key": "input/peabody/2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx",
|
||||
# "uprn_lookup_file_bucket": "condition-data-dev",
|
||||
# "uprn_lookup_file_key": "input/peabody/uprn-lookup/PeabodyPropertymatched_Dec25_propref_UPRN.csv"
|
||||
# }
|
||||
|
||||
# {
|
||||
# "file_type": "LBWF",
|
||||
# "trigger_file_bucket": "condition-data-dev",
|
||||
# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx",
|
||||
# }
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
from datetime import date
|
||||
|
||||
from backend.condition.domain.aspect_condition import AspectCondition
|
||||
|
|
|
|||
|
|
@ -1,16 +0,0 @@
|
|||
from enum import Enum
|
||||
|
||||
class FileType(Enum):
|
||||
LBWF = "lbwf"
|
||||
Peabody = "peabody"
|
||||
|
||||
def detect_file_type(filepath: str) -> FileType:
|
||||
path = filepath.lower()
|
||||
|
||||
if "lbwf" in path:
|
||||
return FileType.LBWF
|
||||
|
||||
if "peabody" in path:
|
||||
return FileType.Peabody
|
||||
|
||||
raise ValueError("Unrecognised file path")
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
from typing import Mapping, Any
|
||||
from io import BytesIO
|
||||
|
||||
from utils.logger import setup_logger
|
||||
from backend.condition.processor import process_file
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
def handler(event: Mapping[str, Any], context: Any) -> None:
|
||||
# Temporary stub for PoC wiring
|
||||
dummy_stream = BytesIO(b"")
|
||||
|
||||
source_key = event.get("source_key", "unknown-source")
|
||||
|
||||
process_file(dummy_stream, source_key)
|
||||
48
backend/condition/handler/Dockerfile
Normal file
48
backend/condition/handler/Dockerfile
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
FROM public.ecr.aws/lambda/python:3.11
|
||||
# For local running:
|
||||
# FROM python:3.11.10-bullseye
|
||||
|
||||
ARG DEV_DB_HOST
|
||||
ARG DEV_DB_PORT
|
||||
ARG DEV_DB_NAME
|
||||
|
||||
|
||||
# Set working directory (Lambda task root)
|
||||
WORKDIR /var/task
|
||||
|
||||
# Environment
|
||||
ENV DB_HOST=${DEV_DB_HOST}
|
||||
ENV DB_PORT=${DEV_DB_PORT}
|
||||
ENV DB_NAME=${DEV_DB_NAME}
|
||||
|
||||
COPY backend/.env.test backend/.env
|
||||
|
||||
# -----------------------------
|
||||
# Copy requirements FIRST (for Docker layer caching)
|
||||
# -----------------------------
|
||||
COPY backend/condition/handler/requirements.txt .
|
||||
|
||||
# Install dependencies into Lambda runtime
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# -----------------------------
|
||||
# Copy application code
|
||||
# -----------------------------
|
||||
COPY utils/ utils/
|
||||
COPY backend/condition/ backend/condition/
|
||||
|
||||
COPY backend/app/db/models/condition.py backend/app/db/models/condition.py
|
||||
COPY backend/app/db/connection.py backend/app/db/connection.py
|
||||
COPY backend/app/config.py backend/app/config.py
|
||||
|
||||
COPY backend/__init__.py backend/__init__.py
|
||||
COPY backend/app/__init__.py backend/app/__init__.py
|
||||
COPY backend/app/db/__init__.py backend/app/db/__init__.py
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Lambda handler
|
||||
# -----------------------------
|
||||
CMD ["backend/condition/handler/handler.handler"]
|
||||
# For local running
|
||||
# CMD ["python", "-m", "backend.condition.handler.handler"]
|
||||
51
backend/condition/handler/handler.py
Normal file
51
backend/condition/handler/handler.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
import json
|
||||
from typing import Mapping, Any
|
||||
from io import BytesIO
|
||||
|
||||
from backend.condition.condition_trigger_request import ConditionTriggerRequest
|
||||
from backend.condition.lookups.uprn_lookup_s3 import UprnLookupS3
|
||||
from backend.condition.processor import process_file
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import read_io_from_s3
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def handler(event: Mapping[str, Any], context: Any) -> None:
|
||||
|
||||
for record in event.get("Records", []):
|
||||
try:
|
||||
body_dict = json.loads(record["body"])
|
||||
logger.debug("Validating request body")
|
||||
payload = ConditionTriggerRequest.model_validate(body_dict)
|
||||
|
||||
logger.debug("Successfully validated request body")
|
||||
|
||||
if payload.uprn_lookup_file_bucket and payload.uprn_lookup_file_key:
|
||||
logger.debug("Getting UPRN lookup file from s3")
|
||||
uprn_lookup = UprnLookupS3(
|
||||
bucket=payload.uprn_lookup_file_bucket,
|
||||
key=payload.uprn_lookup_file_key,
|
||||
) # TODO: replace with postgres implementation
|
||||
logger.debug("Successfully got UPRN lookup file from s3")
|
||||
else:
|
||||
uprn_lookup = None
|
||||
|
||||
logger.debug("Getting conditions data from s3")
|
||||
file_bytes: BytesIO = read_io_from_s3(
|
||||
bucket_name=payload.trigger_file_bucket,
|
||||
file_key=payload.trigger_file_key,
|
||||
)
|
||||
logger.debug(
|
||||
"Successfully got conditions data from s3. Moving on to process file..."
|
||||
)
|
||||
|
||||
process_file(
|
||||
file_stream=file_bytes,
|
||||
file_type=payload.file_type,
|
||||
uprn_lookup=uprn_lookup,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process record: {e}")
|
||||
9
backend/condition/handler/requirements.txt
Normal file
9
backend/condition/handler/requirements.txt
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
openpyxl
|
||||
sqlmodel
|
||||
pydantic-settings
|
||||
psycopg2-binary==2.9.10
|
||||
|
||||
# pandas isn't used, but needed for importing from utils.s3
|
||||
pandas==2.2.2
|
||||
numpy==1.26.4
|
||||
openpyxl
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
from pathlib import Path
|
||||
|
||||
from backend.condition.condition_trigger_request import ConditionFileType
|
||||
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
|
||||
from backend.condition.processor import process_file
|
||||
|
||||
|
||||
|
|
@ -20,13 +22,27 @@ def main() -> None:
|
|||
/ "peabody"
|
||||
/ "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx"
|
||||
)
|
||||
filepaths = [lbwf_path, peabody_path]
|
||||
peabody_uprn_lookup_path: Path = (
|
||||
path / "peabody" / "PeabodyPropertymatched_Dec25_propref_UPRN.csv"
|
||||
)
|
||||
# filepaths = [lbwf_path, peabody_path]
|
||||
filepaths = [lbwf_path]
|
||||
# filepaths = [peabody_path]
|
||||
|
||||
uprn_lookup = UprnLookupLocal(csv_path=peabody_uprn_lookup_path.as_posix())
|
||||
|
||||
def get_file_type(file_path: str) -> ConditionFileType:
|
||||
if "peabody" in file_path:
|
||||
return ConditionFileType.Peabody
|
||||
if "lbwf" in file_path:
|
||||
return ConditionFileType.LBWF
|
||||
|
||||
for fp in filepaths:
|
||||
with fp.open("rb") as f:
|
||||
process_file(
|
||||
file_stream=f,
|
||||
source_key=fp.as_posix(),
|
||||
file_type=get_file_type(fp.as_posix()),
|
||||
uprn_lookup=uprn_lookup,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
8
backend/condition/lookups/uprn_lookup.py
Normal file
8
backend/condition/lookups/uprn_lookup.py
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import BinaryIO, Dict
|
||||
|
||||
|
||||
class UprnLookup(ABC):
|
||||
@abstractmethod
|
||||
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
|
||||
pass
|
||||
23
backend/condition/lookups/uprn_lookup_csv.py
Normal file
23
backend/condition/lookups/uprn_lookup_csv.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import csv
|
||||
from io import TextIOWrapper
|
||||
from typing import BinaryIO, Dict, TextIO
|
||||
from backend.condition.lookups.uprn_lookup import UprnLookup
|
||||
|
||||
|
||||
class UprnLookupLocal(UprnLookup):
|
||||
def __init__(self, csv_path: str):
|
||||
self.csv_path = csv_path
|
||||
|
||||
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
|
||||
with open(self.csv_path, "rb") as f:
|
||||
return self.parse_csv(f)
|
||||
|
||||
def parse_csv(self, file_stream: BinaryIO) -> Dict[str, int]:
|
||||
text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8")
|
||||
mapping: Dict[str, int] = {}
|
||||
reader = csv.DictReader(text_stream)
|
||||
for row in reader:
|
||||
if not row["reference"] or not row["out_uprn"]:
|
||||
continue
|
||||
mapping[row["reference"].strip()] = int(row["out_uprn"].strip())
|
||||
return mapping
|
||||
29
backend/condition/lookups/uprn_lookup_s3.py
Normal file
29
backend/condition/lookups/uprn_lookup_s3.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
import csv
|
||||
from io import BytesIO, TextIOWrapper
|
||||
from typing import BinaryIO, Dict, TextIO
|
||||
|
||||
from backend.condition.lookups.uprn_lookup import UprnLookup
|
||||
from utils.s3 import read_io_from_s3
|
||||
|
||||
|
||||
class UprnLookupS3(UprnLookup):
|
||||
def __init__(self, bucket: str = "", key: str = ""):
|
||||
self.bucket = bucket
|
||||
self.key = key
|
||||
|
||||
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
|
||||
file_bytes: BytesIO = read_io_from_s3(
|
||||
bucket_name=self.bucket, file_key=self.key
|
||||
)
|
||||
|
||||
return self._parse_csv_bytes(file_bytes)
|
||||
|
||||
def _parse_csv_bytes(self, file_stream: BinaryIO) -> Dict[str, int]:
|
||||
text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8")
|
||||
mapping: Dict[str, int] = {}
|
||||
reader = csv.DictReader(text_stream)
|
||||
for row in reader:
|
||||
if not row["reference"] or not row["out_uprn"]:
|
||||
continue
|
||||
mapping[row["reference"].strip()] = int(row["out_uprn"].strip())
|
||||
return mapping
|
||||
|
|
@ -1,27 +1,35 @@
|
|||
from typing import Optional
|
||||
from backend.condition.condition_trigger_request import ConditionFileType
|
||||
from backend.condition.domain.mapping.lbwf.lbwf_mapper import LbwfMapper
|
||||
from backend.condition.domain.mapping.mapper import Mapper
|
||||
from backend.condition.domain.mapping.peabody.peabody_mapper import PeabodyMapper
|
||||
from backend.condition.file_type import FileType
|
||||
from backend.condition.lookups.uprn_lookup import UprnLookup
|
||||
from backend.condition.parsing.parser import Parser
|
||||
from backend.condition.parsing.lbwf_parser import LbwfParser
|
||||
from backend.condition.parsing.peabody_parser import PeabodyParser
|
||||
|
||||
|
||||
def select_parser(file_type: FileType) -> Parser:
|
||||
if file_type is FileType.LBWF:
|
||||
def select_parser(
|
||||
file_type: ConditionFileType, uprn_lookup: Optional[UprnLookup] = None
|
||||
) -> Parser:
|
||||
if file_type is ConditionFileType.LBWF:
|
||||
return LbwfParser()
|
||||
|
||||
if file_type is FileType.Peabody:
|
||||
return PeabodyParser()
|
||||
if file_type is ConditionFileType.Peabody:
|
||||
if not uprn_lookup:
|
||||
raise ValueError(
|
||||
"Cannot instantiate Peabody Parser without UPRN lookup being provided"
|
||||
)
|
||||
return PeabodyParser(uprn_lookup=uprn_lookup)
|
||||
|
||||
raise ValueError("Unrecognised file type, unable to instantiate Parser")
|
||||
|
||||
|
||||
def select_mapper(file_type: FileType) -> Mapper:
|
||||
if file_type is FileType.LBWF:
|
||||
def select_mapper(file_type: ConditionFileType) -> Mapper:
|
||||
if file_type is ConditionFileType.LBWF:
|
||||
return LbwfMapper()
|
||||
|
||||
if file_type is FileType.Peabody:
|
||||
if file_type is ConditionFileType.Peabody:
|
||||
return PeabodyMapper()
|
||||
|
||||
raise ValueError("Unrecognised file type, unable to instantiate Mapper")
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from typing import BinaryIO, Any, Dict, Iterator, List, Tuple
|
||||
from typing import BinaryIO, Any, Dict, Iterator, List, Optional, Tuple
|
||||
from openpyxl import Workbook, load_workbook
|
||||
from collections import defaultdict
|
||||
|
||||
|
|
@ -15,7 +15,10 @@ logger = setup_logger()
|
|||
|
||||
class LbwfParser(Parser):
|
||||
|
||||
def parse(self, file_stream: BinaryIO) -> Any:
|
||||
def parse(
|
||||
self,
|
||||
file_stream: BinaryIO,
|
||||
) -> Any:
|
||||
wb: Workbook = load_workbook(file_stream)
|
||||
address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(
|
||||
wb
|
||||
|
|
|
|||
|
|
@ -1,8 +1,12 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import BinaryIO, Any
|
||||
from typing import BinaryIO, Any, Dict, Optional
|
||||
|
||||
|
||||
class Parser(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, file_stream: BinaryIO) -> Any:
|
||||
pass
|
||||
def parse(
|
||||
self,
|
||||
file_stream: BinaryIO,
|
||||
) -> Any:
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -1,26 +1,43 @@
|
|||
from typing import Any, BinaryIO, Dict, Iterator, List, Tuple, DefaultDict
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from typing import Any, BinaryIO, Dict, List, Optional, Tuple, DefaultDict
|
||||
from openpyxl import Workbook, load_workbook
|
||||
from collections import defaultdict
|
||||
|
||||
from backend.condition.lookups.uprn_lookup import UprnLookup
|
||||
from backend.condition.parsing.parser import Parser
|
||||
from backend.condition.parsing.records.peabody.peabody_asset_condition import PeabodyAssetCondition
|
||||
from backend.condition.parsing.records.peabody.peabody_asset_condition import (
|
||||
PeabodyAssetCondition,
|
||||
)
|
||||
from backend.condition.parsing.records.peabody.peabody_property import PeabodyProperty
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
class PeabodyParser(Parser):
|
||||
def parse(self, file_stream: BinaryIO) -> Any:
|
||||
wb: Workbook = load_workbook(file_stream)
|
||||
address_to_uprn_map: Dict[str, int] = PeabodyParser._generate_address_to_uprn_dict(wb)
|
||||
|
||||
assets = self._parse_assets(wb)
|
||||
|
||||
return self._group_assets_into_properties(
|
||||
assets=assets,
|
||||
address_to_uprn_map=address_to_uprn_map,
|
||||
class PeabodyParser(Parser):
|
||||
def __init__(self, uprn_lookup: UprnLookup):
|
||||
self.uprn_lookup: UprnLookup = uprn_lookup # TODO: move this to the ABC?
|
||||
|
||||
def parse(
|
||||
self,
|
||||
file_stream: BinaryIO,
|
||||
) -> Any:
|
||||
file_stream.seek(0)
|
||||
logger.debug("[PeabodyParser] Loading workbook...")
|
||||
wb: Workbook = load_workbook(file_stream, read_only=True, data_only=True)
|
||||
logger.debug("[PeabodyParser] Successfully loaded workbook. Parsing assets...")
|
||||
assets = PeabodyParser._parse_assets(wb)
|
||||
logger.debug(
|
||||
"[PeabodyParser] Successfully parsed assets. Parsing UPRN lookup..."
|
||||
)
|
||||
|
||||
location_ref_to_uprn_map = self.uprn_lookup.get_property_ref_to_uprn_lookup()
|
||||
logger.debug("[PeabodyParser] Successfully parsed UPRN lookup")
|
||||
return PeabodyParser._group_assets_into_properties(
|
||||
assets=assets,
|
||||
location_ref_to_uprn_map=location_ref_to_uprn_map,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]:
|
||||
|
|
@ -33,39 +50,44 @@ class PeabodyParser(Parser):
|
|||
assets: List[PeabodyAssetCondition] = []
|
||||
for row in asset_rows:
|
||||
try:
|
||||
asset = PeabodyParser._map_row_to_asset_record(row, asset_header_indexes)
|
||||
asset = PeabodyParser._map_row_to_asset_record(
|
||||
row, asset_header_indexes
|
||||
)
|
||||
if not asset.is_block_level:
|
||||
# Block-level condition surveys are out of scope for now
|
||||
# until we have a wider think on how to handle block
|
||||
assets.append(asset) # TODO: handle block-level assets
|
||||
# until we have a wider think on how to handle blocks
|
||||
assets.append(asset) # TODO: handle block-level assets
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error mapping Peabody row to asset record: {e}")
|
||||
continue
|
||||
|
||||
return assets
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _group_assets_into_properties(
|
||||
assets: List[PeabodyAssetCondition],
|
||||
address_to_uprn_map: Dict[str, int],
|
||||
location_ref_to_uprn_map: Dict[str, int],
|
||||
) -> List[PeabodyProperty]:
|
||||
assets_by_address: DefaultDict[str, List[PeabodyAssetCondition]] = defaultdict(list)
|
||||
assets_by_location_reference: DefaultDict[str, List[PeabodyAssetCondition]] = (
|
||||
defaultdict(list)
|
||||
)
|
||||
|
||||
for asset in assets:
|
||||
if asset.full_address is None:
|
||||
if asset.lo_reference is None:
|
||||
continue
|
||||
|
||||
address = asset.full_address.strip()
|
||||
assets_by_address[address].append(asset)
|
||||
assets_by_location_reference[asset.lo_reference].append(asset)
|
||||
|
||||
properties: List[PeabodyProperty] = []
|
||||
failed_mappings_count = 0
|
||||
|
||||
for address, grouped_assets in assets_by_address.items():
|
||||
uprn = address_to_uprn_map.get(address)
|
||||
for location_ref, grouped_assets in assets_by_location_reference.items():
|
||||
|
||||
uprn = location_ref_to_uprn_map.get(location_ref)
|
||||
|
||||
if uprn is None:
|
||||
logger.warning(f"No UPRN found for address: {address}")
|
||||
failed_mappings_count += 1
|
||||
continue
|
||||
|
||||
properties.append(
|
||||
|
|
@ -75,9 +97,9 @@ class PeabodyParser(Parser):
|
|||
)
|
||||
)
|
||||
|
||||
logger.warning(f"No UPRN found for {failed_mappings_count} Location References")
|
||||
return properties
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _map_row_to_asset_record(
|
||||
row: Any | Tuple[object | None, ...],
|
||||
|
|
@ -102,39 +124,9 @@ class PeabodyParser(Parser):
|
|||
condition_survey_date=row[header_indexes["condition_survey_date"]],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _generate_address_to_uprn_dict(wb: Workbook) -> Dict[str, int | None]:
|
||||
sheet = wb["Survey Records - D & Lower"]
|
||||
rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True)
|
||||
|
||||
headers = next(rows)
|
||||
header_indexes: Dict[str, int] = PeabodyParser._get_column_indexes_by_name(headers)
|
||||
|
||||
address_idx = header_indexes["full_address"]
|
||||
|
||||
|
||||
address_to_uprn: Dict[str, int] = {}
|
||||
# Generate random UPRNs for now
|
||||
next_uprn = 1 # TODO: get real UPRNs
|
||||
|
||||
for row in rows:
|
||||
address = row[address_idx]
|
||||
|
||||
if address is None:
|
||||
continue
|
||||
|
||||
address = address.strip()
|
||||
|
||||
if address not in address_to_uprn:
|
||||
address_to_uprn[address] = next_uprn
|
||||
next_uprn += 1
|
||||
|
||||
return address_to_uprn
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _get_column_indexes_by_name(
|
||||
headers: Tuple[object | None, ...]
|
||||
headers: Tuple[object | None, ...],
|
||||
) -> Dict[str, int]:
|
||||
index: Dict[str, int] = {}
|
||||
|
||||
|
|
@ -142,4 +134,4 @@ class PeabodyParser(Parser):
|
|||
if isinstance(header, str):
|
||||
index[header] = i
|
||||
|
||||
return index
|
||||
return index
|
||||
|
|
|
|||
87
backend/condition/persistence/condition_postgres.py
Normal file
87
backend/condition/persistence/condition_postgres.py
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
import time
|
||||
from typing import List, Optional
|
||||
from sqlmodel import Session
|
||||
|
||||
from utils.logger import setup_logger
|
||||
from backend.app.db.models.condition import (
|
||||
AspectConditionModel,
|
||||
ElementModel,
|
||||
PropertyConditionSurveyModel,
|
||||
)
|
||||
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
|
||||
from backend.app.db.connection import db_session
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class ConditionPostgres:
|
||||
|
||||
def bulk_insert_surveys(
|
||||
self, surveys: List[PropertyConditionSurvey], batch_size: Optional[int] = 100
|
||||
) -> None:
|
||||
logger.debug(
|
||||
f"[ConditionPostgres] Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..."
|
||||
)
|
||||
survey_models: List[PropertyConditionSurveyModel] = [
|
||||
ConditionPostgres.map_survey_to_model(s) for s in surveys
|
||||
]
|
||||
total: int = len(survey_models)
|
||||
logger.debug(
|
||||
f"[ConditionPostgres] Finished mapping {total} surveys. Writing to database in batches of {batch_size}..."
|
||||
)
|
||||
|
||||
with db_session() as session:
|
||||
logger.info("[ConditionPostgres] Successfully made connection to database")
|
||||
for start in range(0, total, batch_size):
|
||||
end = min(start + batch_size, total)
|
||||
batch = survey_models[start:end]
|
||||
|
||||
t0: float = time.perf_counter()
|
||||
ConditionPostgres._insert_surveys_batch(batch, session)
|
||||
elapsed: float = time.perf_counter() - t0
|
||||
|
||||
logger.info(
|
||||
f"Inserted batch {start} - {end} ({len(batch)} surveys) in {elapsed} seconds",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def map_survey_to_model(
|
||||
survey: PropertyConditionSurvey,
|
||||
) -> PropertyConditionSurveyModel:
|
||||
survey_model = PropertyConditionSurveyModel(
|
||||
uprn=survey.uprn,
|
||||
date=survey.date,
|
||||
source=survey.source,
|
||||
elements=[],
|
||||
)
|
||||
|
||||
for element in survey.elements:
|
||||
element_model = ElementModel(
|
||||
element_type=element.element_type,
|
||||
element_instance=element.element_instance,
|
||||
aspect_conditions=[],
|
||||
)
|
||||
|
||||
for aspect in element.aspect_conditions:
|
||||
aspect_model = AspectConditionModel(
|
||||
aspect_type=aspect.aspect_type,
|
||||
aspect_instance=aspect.aspect_instance,
|
||||
value=aspect.value,
|
||||
quantity=aspect.quantity,
|
||||
install_date=aspect.install_date,
|
||||
renewal_year=aspect.renewal_year,
|
||||
comments=aspect.comments,
|
||||
)
|
||||
|
||||
element_model.aspect_conditions.append(aspect_model)
|
||||
|
||||
survey_model.elements.append(element_model)
|
||||
|
||||
return survey_model
|
||||
|
||||
@staticmethod
|
||||
def _insert_surveys_batch(
|
||||
surveys: List[PropertyConditionSurveyModel], session: Session
|
||||
) -> None:
|
||||
session.add_all(surveys)
|
||||
session.commit()
|
||||
|
|
@ -1,25 +1,38 @@
|
|||
from typing import Any, BinaryIO, List
|
||||
from typing import Any, BinaryIO, List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from backend.condition.condition_trigger_request import ConditionFileType
|
||||
from backend.condition.lookups.uprn_lookup import UprnLookup
|
||||
from utils.logger import setup_logger
|
||||
from backend.condition.domain.mapping.mapper import Mapper
|
||||
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
|
||||
from backend.condition.parsing.parser import Parser
|
||||
from utils.logger import setup_logger
|
||||
from backend.condition.file_type import FileType, detect_file_type
|
||||
from backend.condition.persistence.condition_postgres import ConditionPostgres
|
||||
from backend.condition.parsing.factory import select_parser, select_mapper
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
def process_file(file_stream: BinaryIO, source_key: str) -> None:
|
||||
print(f"[processor] Received file: {source_key}")
|
||||
|
||||
def process_file(
|
||||
file_stream: BinaryIO,
|
||||
file_type: ConditionFileType,
|
||||
uprn_lookup: Optional[UprnLookup],
|
||||
) -> None:
|
||||
# Instantiation
|
||||
file_type: FileType = detect_file_type(source_key)
|
||||
parser: Parser = select_parser(file_type)
|
||||
logger.debug(f"[processor] Instantiating classes...")
|
||||
parser: Parser = select_parser(file_type, uprn_lookup)
|
||||
mapper: Mapper = select_mapper(file_type)
|
||||
persistence = ConditionPostgres()
|
||||
|
||||
logger.debug(f"[processor] Finished instantiating classes. Calling Parser...")
|
||||
|
||||
# Orchestration
|
||||
raw_properties: List[Any] = parser.parse(file_stream)
|
||||
|
||||
logger.info(
|
||||
f"[processor] Finished loading customer survey data for {len(raw_properties)} properties. Mapping..."
|
||||
)
|
||||
|
||||
survey_year = datetime.now().year # TODO: get this from filepath or elsewhere
|
||||
|
||||
property_condition_surveys: List[PropertyConditionSurvey] = []
|
||||
|
|
@ -29,4 +42,10 @@ def process_file(file_stream: BinaryIO, source_key: str) -> None:
|
|||
mapper.map_asset_conditions_for_property(p, survey_year)
|
||||
)
|
||||
|
||||
print("done") # temp
|
||||
logger.info(
|
||||
f"[processor] Finished mapping {len(property_condition_surveys)} properties. Writing to database..."
|
||||
)
|
||||
|
||||
persistence.bulk_insert_surveys(property_condition_surveys)
|
||||
|
||||
logger.info(f"[processor] Finished loading surveys to database")
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
from backend.app.db.models.condition import PropertyConditionSurveyModel
|
||||
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
|
||||
|
||||
|
||||
|
|
@ -72,3 +73,41 @@ class CustomAsserts:
|
|||
f"{actual_aspect.comments} != {expected_aspect.comments}"
|
||||
)
|
||||
return True
|
||||
|
||||
def assert_property_condition_survey_model_matches_expected(
|
||||
actual_model: PropertyConditionSurveyModel,
|
||||
expected: dict,
|
||||
) -> None:
|
||||
assert actual_model.uprn == expected["uprn"], "UPRN differs"
|
||||
assert actual_model.date == expected["date"], "Date differs"
|
||||
assert actual_model.source == expected["source"], "Source differs"
|
||||
|
||||
assert len(actual_model.elements) == len(expected["elements"]), (
|
||||
f"Expected {len(expected['elements'])} elements, "
|
||||
f"got {len(actual_model.elements)}"
|
||||
)
|
||||
|
||||
for i, (actual_element, expected_element) in enumerate(
|
||||
zip(actual_model.elements, expected["elements"])
|
||||
):
|
||||
assert (
|
||||
actual_element.element_type == expected_element["element_type"]
|
||||
), f"Element[{i}].element_type differs"
|
||||
assert (
|
||||
actual_element.element_instance == expected_element["element_instance"]
|
||||
), f"Element[{i}].element_instance differs"
|
||||
|
||||
assert len(actual_element.aspect_conditions) == len(
|
||||
expected_element["aspects"]
|
||||
), f"Element[{i}] aspect count differs"
|
||||
|
||||
for j, (actual_aspect, expected_aspect) in enumerate(
|
||||
zip(actual_element.aspect_conditions, expected_element["aspects"])
|
||||
):
|
||||
prefix = f"Element[{i}].Aspect[{j}]"
|
||||
|
||||
for key, value in expected_aspect.items():
|
||||
assert getattr(actual_aspect, key) == value, (
|
||||
f"{prefix}.{key} differs: "
|
||||
f"{getattr(actual_aspect, key)} != {value}"
|
||||
)
|
||||
|
|
|
|||
34
backend/condition/tests/lookups/test_uprn_lookup_csv.py
Normal file
34
backend/condition/tests/lookups/test_uprn_lookup_csv.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import pytest
|
||||
from typing import Dict
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def prop_ref_uprn_csv_file() -> str:
|
||||
csv_content = """reference,out_uprn
|
||||
ABC123,10000000001
|
||||
DEF456,10000000002
|
||||
GHI789,10000000003
|
||||
"""
|
||||
with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp:
|
||||
tmp.write(csv_content)
|
||||
tmp.flush()
|
||||
return tmp.name
|
||||
|
||||
|
||||
def test_generate_prop_ref_uprn_from_csv_file(prop_ref_uprn_csv_file: str) -> None:
|
||||
# arrange
|
||||
uprn_lookup = UprnLookupLocal(prop_ref_uprn_csv_file)
|
||||
expected_map: Dict[str, int] = {
|
||||
"ABC123": 10000000001,
|
||||
"DEF456": 10000000002,
|
||||
"GHI789": 10000000003,
|
||||
}
|
||||
|
||||
# act
|
||||
actual_map: Dict[str, int] = uprn_lookup.get_property_ref_to_uprn_lookup()
|
||||
|
||||
# assert
|
||||
assert actual_map == expected_map
|
||||
|
|
@ -1,11 +1,13 @@
|
|||
import pytest
|
||||
|
||||
from backend.condition.condition_trigger_request import ConditionFileType
|
||||
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
|
||||
from backend.condition.parsing.factory import select_parser
|
||||
from backend.condition.file_type import FileType
|
||||
|
||||
|
||||
def test_selects_lbwf_parser():
|
||||
# arrange
|
||||
file_type = FileType.LBWF
|
||||
file_type = ConditionFileType.LBWF
|
||||
expected_class_name = "LbwfParser"
|
||||
|
||||
# act
|
||||
|
|
@ -14,13 +16,15 @@ def test_selects_lbwf_parser():
|
|||
# assert
|
||||
assert expected_class_name == actual_class_name
|
||||
|
||||
|
||||
def test_selects_peabody_parser():
|
||||
# arrange
|
||||
file_type = FileType.Peabody
|
||||
file_type = ConditionFileType.Peabody
|
||||
expected_class_name = "PeabodyParser"
|
||||
uprn_lookup = UprnLookupLocal(csv_path="test")
|
||||
|
||||
# act
|
||||
actual_class_name = select_parser(file_type).__class__.__name__
|
||||
actual_class_name = select_parser(file_type, uprn_lookup).__class__.__name__
|
||||
|
||||
# assert
|
||||
assert expected_class_name == actual_class_name
|
||||
assert expected_class_name == actual_class_name
|
||||
|
|
|
|||
|
|
@ -1,127 +1,143 @@
|
|||
from tempfile import NamedTemporaryFile
|
||||
import pytest
|
||||
from typing import Any
|
||||
from typing import Any, Dict
|
||||
from io import BytesIO
|
||||
from openpyxl import Workbook
|
||||
from datetime import datetime
|
||||
|
||||
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
|
||||
from backend.condition.parsing.peabody_parser import PeabodyParser
|
||||
from backend.condition.parsing.records.peabody.peabody_asset_condition import PeabodyAssetCondition
|
||||
from backend.condition.parsing.records.peabody.peabody_asset_condition import (
|
||||
PeabodyAssetCondition,
|
||||
)
|
||||
from backend.condition.parsing.records.peabody.peabody_property import PeabodyProperty
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def peabody_assets_xlsx_bytes() -> BytesIO:
|
||||
wb = Workbook()
|
||||
survey_records_d_and_lower = wb.active
|
||||
survey_records_d_and_lower.title = "Survey Records - D & Lower"
|
||||
survey_records_d_and_lower.append([
|
||||
"Lo_Reference",
|
||||
"full_address",
|
||||
"location_type_code",
|
||||
"Parent_Lo_Reference",
|
||||
"Element_Code",
|
||||
"Element",
|
||||
"Sub_Element_Code",
|
||||
"Sub_Element",
|
||||
"Material_Code",
|
||||
"material_or_answer",
|
||||
"Renewal_Quantity",
|
||||
"Renewal_Year",
|
||||
"Renewal_Cost",
|
||||
"cloned",
|
||||
"lo_type_code",
|
||||
"condition_survey_date",
|
||||
])
|
||||
survey_records_d_and_lower.append([
|
||||
"B000RAND",
|
||||
"1 RANDOM HOUSE LONDON",
|
||||
3,
|
||||
"RAND2EST",
|
||||
110,
|
||||
"ROOFS",
|
||||
1,
|
||||
"Primary Roof",
|
||||
9,
|
||||
"Other",
|
||||
3,
|
||||
2054,
|
||||
330,
|
||||
"N",
|
||||
3,
|
||||
datetime(2025,12,4,9,17,0)
|
||||
])
|
||||
survey_records_d_and_lower.append([
|
||||
"B000BLOCK",
|
||||
"1100 BLOCK",
|
||||
3,
|
||||
"RAND2EST",
|
||||
110,
|
||||
"ROOFS",
|
||||
1,
|
||||
"Primary Roof",
|
||||
9,
|
||||
"Other",
|
||||
3,
|
||||
2054,
|
||||
330,
|
||||
"N",
|
||||
3,
|
||||
datetime(2025,12,4,9,17,0)
|
||||
])
|
||||
survey_records_d_and_lower.append([
|
||||
"B000FAKE",
|
||||
"3 FAKE CLOSE LONDON",
|
||||
3,
|
||||
"FAKEEST",
|
||||
100,
|
||||
"GENERAL",
|
||||
15,
|
||||
"External Decoration",
|
||||
2,
|
||||
"Normal",
|
||||
1,
|
||||
2035,
|
||||
1500.7,
|
||||
"N",
|
||||
3,
|
||||
datetime(2025,7,5,0,0,0)
|
||||
])
|
||||
survey_records_d_and_lower.append([
|
||||
"B000MIS",
|
||||
"99 MISC ROAD LONDON",
|
||||
3,
|
||||
"300828",
|
||||
54,
|
||||
"HHSRS",
|
||||
29,
|
||||
"HHSRS Structural Collapse & Falling Elements",
|
||||
4,
|
||||
"HHSRS Moderate",
|
||||
2,
|
||||
2027,
|
||||
None,
|
||||
"N",
|
||||
3,
|
||||
None
|
||||
])
|
||||
survey_records_d_and_lower.append([
|
||||
"B000MIS",
|
||||
"99 MISC ROAD LONDON",
|
||||
3,
|
||||
"300828",
|
||||
53,
|
||||
"External",
|
||||
2,
|
||||
"Chimney",
|
||||
2,
|
||||
"Present",
|
||||
33,
|
||||
2053,
|
||||
3531,
|
||||
"N",
|
||||
3,
|
||||
None
|
||||
])
|
||||
|
||||
survey_records_d_and_lower.append(
|
||||
[
|
||||
"Lo_Reference",
|
||||
"full_address",
|
||||
"location_type_code",
|
||||
"Parent_Lo_Reference",
|
||||
"Element_Code",
|
||||
"Element",
|
||||
"Sub_Element_Code",
|
||||
"Sub_Element",
|
||||
"Material_Code",
|
||||
"material_or_answer",
|
||||
"Renewal_Quantity",
|
||||
"Renewal_Year",
|
||||
"Renewal_Cost",
|
||||
"cloned",
|
||||
"lo_type_code",
|
||||
"condition_survey_date",
|
||||
]
|
||||
)
|
||||
survey_records_d_and_lower.append(
|
||||
[
|
||||
"B000RAND",
|
||||
"1 RANDOM HOUSE LONDON",
|
||||
3,
|
||||
"RAND2EST",
|
||||
110,
|
||||
"ROOFS",
|
||||
1,
|
||||
"Primary Roof",
|
||||
9,
|
||||
"Other",
|
||||
3,
|
||||
2054,
|
||||
330,
|
||||
"N",
|
||||
3,
|
||||
datetime(2025, 12, 4, 9, 17, 0),
|
||||
]
|
||||
)
|
||||
survey_records_d_and_lower.append(
|
||||
[
|
||||
"B000BLOCK",
|
||||
"1100 BLOCK",
|
||||
3,
|
||||
"RAND2EST",
|
||||
110,
|
||||
"ROOFS",
|
||||
1,
|
||||
"Primary Roof",
|
||||
9,
|
||||
"Other",
|
||||
3,
|
||||
2054,
|
||||
330,
|
||||
"N",
|
||||
3,
|
||||
datetime(2025, 12, 4, 9, 17, 0),
|
||||
]
|
||||
)
|
||||
survey_records_d_and_lower.append(
|
||||
[
|
||||
"B000FAKE",
|
||||
"3 FAKE CLOSE LONDON",
|
||||
3,
|
||||
"FAKEEST",
|
||||
100,
|
||||
"GENERAL",
|
||||
15,
|
||||
"External Decoration",
|
||||
2,
|
||||
"Normal",
|
||||
1,
|
||||
2035,
|
||||
1500.7,
|
||||
"N",
|
||||
3,
|
||||
datetime(2025, 7, 5, 0, 0, 0),
|
||||
]
|
||||
)
|
||||
survey_records_d_and_lower.append(
|
||||
[
|
||||
"B000MIS",
|
||||
"99 MISC ROAD LONDON",
|
||||
3,
|
||||
"300828",
|
||||
54,
|
||||
"HHSRS",
|
||||
29,
|
||||
"HHSRS Structural Collapse & Falling Elements",
|
||||
4,
|
||||
"HHSRS Moderate",
|
||||
2,
|
||||
2027,
|
||||
None,
|
||||
"N",
|
||||
3,
|
||||
None,
|
||||
]
|
||||
)
|
||||
survey_records_d_and_lower.append(
|
||||
[
|
||||
"B000MIS",
|
||||
"99 MISC ROAD LONDON",
|
||||
3,
|
||||
"300828",
|
||||
53,
|
||||
"External",
|
||||
2,
|
||||
"Chimney",
|
||||
2,
|
||||
"Present",
|
||||
33,
|
||||
2053,
|
||||
3531,
|
||||
"N",
|
||||
3,
|
||||
None,
|
||||
]
|
||||
)
|
||||
|
||||
stream = BytesIO()
|
||||
wb.save(stream)
|
||||
|
|
@ -129,9 +145,27 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
|
|||
|
||||
return stream
|
||||
|
||||
def test_peabody_parser_parses_conditions(peabody_assets_xlsx_bytes):
|
||||
|
||||
@pytest.fixture
|
||||
def prop_ref_uprn_csv_file() -> str:
|
||||
csv_content = """reference,out_uprn
|
||||
B000RAND,1
|
||||
B000BLOCK,2
|
||||
B000FAKE,3
|
||||
B000MIS,4
|
||||
"""
|
||||
with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp:
|
||||
tmp.write(csv_content)
|
||||
tmp.flush()
|
||||
return tmp.name
|
||||
|
||||
|
||||
def test_peabody_parser_parses_conditions(
|
||||
peabody_assets_xlsx_bytes, prop_ref_uprn_csv_file
|
||||
):
|
||||
# arrange
|
||||
parser = PeabodyParser()
|
||||
uprn_lookup = UprnLookupLocal(csv_path=prop_ref_uprn_csv_file)
|
||||
parser = PeabodyParser(uprn_lookup=uprn_lookup)
|
||||
|
||||
# act
|
||||
result: Any = parser.parse(peabody_assets_xlsx_bytes)
|
||||
|
|
@ -141,6 +175,7 @@ def test_peabody_parser_parses_conditions(peabody_assets_xlsx_bytes):
|
|||
|
||||
assert all(isinstance(item, PeabodyProperty) for item in result)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def asset_condition_factory():
|
||||
def _factory(full_address: str) -> PeabodyAssetCondition:
|
||||
|
|
@ -165,6 +200,7 @@ def asset_condition_factory():
|
|||
|
||||
return _factory
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"full_address, expected_block_level",
|
||||
[
|
||||
|
|
@ -175,7 +211,7 @@ def asset_condition_factory():
|
|||
("81A-B GORE ROAD LONDON", True),
|
||||
("73 & 74 HARVEST COURT ST. ALBANS", True),
|
||||
("25 HAVERSHAM COURT GREENFORD", False),
|
||||
("FLAT 10 SPARROW COURT SOUTHMERE DRIVE LONDON SE2 9ES", False)
|
||||
("FLAT 10 SPARROW COURT SOUTHMERE DRIVE LONDON SE2 9ES", False),
|
||||
],
|
||||
)
|
||||
def test_peabody_asset_is_block_level(
|
||||
|
|
@ -187,4 +223,4 @@ def test_peabody_asset_is_block_level(
|
|||
asset_condition = asset_condition_factory(full_address)
|
||||
|
||||
# act + assert
|
||||
assert asset_condition.is_block_level == expected_block_level
|
||||
assert asset_condition.is_block_level == expected_block_level
|
||||
|
|
|
|||
164
backend/condition/tests/persistence/test_condition_postgres.py
Normal file
164
backend/condition/tests/persistence/test_condition_postgres.py
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
import pytest
|
||||
from datetime import date
|
||||
|
||||
from backend.condition.persistence.condition_postgres import ConditionPostgres
|
||||
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
|
||||
from backend.condition.domain.element import Element
|
||||
from backend.condition.domain.element_type import ElementType
|
||||
from backend.condition.domain.aspect_condition import AspectCondition
|
||||
from backend.condition.domain.aspect_type import AspectType
|
||||
from backend.app.db.models.condition import PropertyConditionSurveyModel
|
||||
from backend.condition.tests.custom_asserts import CustomAsserts
|
||||
|
||||
|
||||
def test_map_survey_to_model() -> None:
|
||||
# arrange
|
||||
survey = PropertyConditionSurvey(
|
||||
uprn=1,
|
||||
elements=[
|
||||
Element(
|
||||
element_type=ElementType.EXTERNAL_WINDOWS,
|
||||
element_instance=1,
|
||||
aspect_conditions=[
|
||||
AspectCondition(
|
||||
aspect_type=AspectType.MATERIAL,
|
||||
aspect_instance=1,
|
||||
value="UPVC Double Glazed",
|
||||
quantity=8,
|
||||
install_date=None,
|
||||
renewal_year=2036,
|
||||
comments=None,
|
||||
),
|
||||
],
|
||||
),
|
||||
Element(
|
||||
element_type=ElementType.EXTERNAL_DECORATION,
|
||||
element_instance=1,
|
||||
aspect_conditions=[
|
||||
AspectCondition(
|
||||
aspect_type=AspectType.CONDITION,
|
||||
aspect_instance=1,
|
||||
value="Normal",
|
||||
quantity=1,
|
||||
install_date=None,
|
||||
renewal_year=2029,
|
||||
comments=None,
|
||||
)
|
||||
],
|
||||
),
|
||||
Element(
|
||||
element_type=ElementType.EXTERNAL_WALL,
|
||||
element_instance=1,
|
||||
aspect_conditions=[
|
||||
AspectCondition(
|
||||
aspect_type=AspectType.FINISH,
|
||||
aspect_instance=1,
|
||||
value="Pointed",
|
||||
quantity=65,
|
||||
install_date=None,
|
||||
renewal_year=2045,
|
||||
comments=None,
|
||||
),
|
||||
AspectCondition(
|
||||
aspect_type=AspectType.FINISH,
|
||||
aspect_instance=1,
|
||||
value="Pointing",
|
||||
quantity=1,
|
||||
install_date=None,
|
||||
renewal_year=2069,
|
||||
comments=None,
|
||||
),
|
||||
AspectCondition(
|
||||
aspect_type=AspectType.FINISH,
|
||||
aspect_instance=2,
|
||||
value="Tile Hung",
|
||||
quantity=8,
|
||||
install_date=None,
|
||||
renewal_year=2049,
|
||||
comments=None,
|
||||
),
|
||||
],
|
||||
),
|
||||
],
|
||||
date=date(2000, 1, 1),
|
||||
source="Peabody",
|
||||
)
|
||||
|
||||
expected = {
|
||||
"uprn": 1,
|
||||
"date": date(2000, 1, 1),
|
||||
"source": "Peabody",
|
||||
"elements": [
|
||||
{
|
||||
"element_type": ElementType.EXTERNAL_WINDOWS,
|
||||
"element_instance": 1,
|
||||
"aspects": [
|
||||
{
|
||||
"aspect_type": AspectType.MATERIAL,
|
||||
"aspect_instance": 1,
|
||||
"value": "UPVC Double Glazed",
|
||||
"quantity": 8,
|
||||
"install_date": None,
|
||||
"renewal_year": 2036,
|
||||
"comments": None,
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"element_type": ElementType.EXTERNAL_DECORATION,
|
||||
"element_instance": 1,
|
||||
"aspects": [
|
||||
{
|
||||
"aspect_type": AspectType.CONDITION,
|
||||
"aspect_instance": 1,
|
||||
"value": "Normal",
|
||||
"quantity": 1,
|
||||
"install_date": None,
|
||||
"renewal_year": 2029,
|
||||
"comments": None,
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"element_type": ElementType.EXTERNAL_WALL,
|
||||
"element_instance": 1,
|
||||
"aspects": [
|
||||
{
|
||||
"aspect_instance": 1,
|
||||
"value": "Pointed",
|
||||
"quantity": 65,
|
||||
"install_date": None,
|
||||
"renewal_year": 2045,
|
||||
"comments": None,
|
||||
},
|
||||
{
|
||||
"aspect_type": AspectType.FINISH,
|
||||
"aspect_instance": 1,
|
||||
"value": "Pointing",
|
||||
"quantity": 1,
|
||||
"install_date": None,
|
||||
"renewal_year": 2069,
|
||||
"comments": None,
|
||||
},
|
||||
{
|
||||
"aspect_type": AspectType.FINISH,
|
||||
"aspect_instance": 2,
|
||||
"value": "Tile Hung",
|
||||
"quantity": 8,
|
||||
"install_date": None,
|
||||
"renewal_year": 2049,
|
||||
"comments": None,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
# act
|
||||
model: PropertyConditionSurveyModel = ConditionPostgres.map_survey_to_model(survey)
|
||||
|
||||
# assert (survey level)
|
||||
CustomAsserts.assert_property_condition_survey_model_matches_expected(
|
||||
model,
|
||||
expected,
|
||||
)
|
||||
|
|
@ -1,22 +0,0 @@
|
|||
import pytest
|
||||
|
||||
from backend.condition.file_type import FileType, detect_file_type
|
||||
|
||||
def test_detects_lbwf_file_type():
|
||||
# arrange
|
||||
file_path_str = "uploads/lbwf/Exaple Asset Data.xlsx"
|
||||
expected_file_type = FileType.LBWF
|
||||
|
||||
# act
|
||||
actual_file_type: FileType = detect_file_type(file_path_str)
|
||||
|
||||
# assert
|
||||
assert expected_file_type == actual_file_type
|
||||
|
||||
def test_unknown_filepath_raises_value_error():
|
||||
# arrange
|
||||
file_path_str = "unknown/Example Asset Data.xlsx"
|
||||
|
||||
# act + assert
|
||||
with pytest.raises(ValueError):
|
||||
detect_file_type(file_path_str)
|
||||
|
|
@ -978,13 +978,15 @@ async def model_engine(body: PlanTriggerRequest):
|
|||
recommendations_scoring_data.extend(p.recommendations_scoring_data)
|
||||
|
||||
logger.info("Preparing data for scoring in sap change api")
|
||||
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data).drop(
|
||||
columns=[
|
||||
"rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
|
||||
"carbon_ending"
|
||||
]
|
||||
)
|
||||
# Temp putting this here
|
||||
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
|
||||
if not recommendations_scoring_data.empty:
|
||||
recommendations_scoring_data = recommendations_scoring_data.drop(
|
||||
columns=[
|
||||
"rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
|
||||
"carbon_ending"
|
||||
]
|
||||
)
|
||||
# TODO: Temp putting this here
|
||||
recommendations_scoring_data["is_post_sap10_ending"] = True
|
||||
|
||||
all_predictions = await model_api.async_paginated_predictions(
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
# Pandas and numpy
|
||||
numpy==2.1.2
|
||||
pandas==2.2.3
|
||||
|
|
@ -22,4 +23,4 @@ pyarrow==17.0.0
|
|||
fastparquet==2024.5.0
|
||||
aiohttp==3.10.10
|
||||
# find my epc
|
||||
beautifulsoup4
|
||||
beautifulsoup4
|
||||
|
|
@ -313,4 +313,15 @@ class ModelApi:
|
|||
logger.error(f"Batch {chunk}-{chunk + batch_size} failed (Attempt {attempts}): {e}")
|
||||
await asyncio.sleep(2 ** attempts) # exponential backoff
|
||||
await self.close_aiohttp_session()
|
||||
|
||||
# Ensure stable output structure for the datagrame to be utilised by other functions downstream
|
||||
for k in all_predictions.keys():
|
||||
if all_predictions[k].empty:
|
||||
col_template = ['id', 'predictions', 'property_id', 'recommendation_id', 'phase'] if (
|
||||
extract_ids) else ['id', 'predictions']
|
||||
|
||||
all_predictions[k] = pd.DataFrame(
|
||||
columns=col_template
|
||||
)
|
||||
|
||||
return all_predictions
|
||||
|
|
|
|||
102
backend/onboarders/README.md
Normal file
102
backend/onboarders/README.md
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
# Retrofit Property Data Onboarding
|
||||
|
||||
This repository contains an ETL pipeline for transforming raw retrofit property data from external source systems (
|
||||
currently Parity) into a standardised internal format, compatible for both address2uprn and engine.
|
||||
|
||||
The pipeline is designed to:
|
||||
|
||||
- Run as an AWS Lambda triggered by SQS
|
||||
- Read raw CSV/XLSX files from S3
|
||||
- Perform rule-based mappings
|
||||
- Infer as built property attributes, assumed based on age
|
||||
- Output a processed csv, back to s3 to be consumed by address2uprn
|
||||
|
||||
### Structure
|
||||
|
||||
SQS → Lambda handler → OnboarderFactory → System-specific Onboarder → Mapping → CSV to S3
|
||||
|
||||
Each source system implements its own **Onboarder**, while sharing a common base and mapping process.
|
||||
|
||||
---
|
||||
|
||||
### Repository Structure
|
||||
|
||||
onboarders/
|
||||
├── `handler.py` # Lambda entrypoint \
|
||||
├── `factory.py` # Onboarder factory \
|
||||
├── `base.py` # Shared onboarding base class \
|
||||
├── `parity.py` # Parity-specific transformation logic \
|
||||
├── `mappings/` \
|
||||
│ └── `parity/` # Parity domain mappings & classifiers \
|
||||
│ ├── `age_band.py` \
|
||||
│ ├── `property_type.py` \
|
||||
│ ├── `built_form.py` \
|
||||
│ ├── `walls.py` \
|
||||
│ ├── `roof.py` \
|
||||
│ ├── `floor.py` \
|
||||
│ ├── `glazing.py` \
|
||||
│ ├── `heating.py` \
|
||||
│ ├── `as_built_wall_classifiers.py` \
|
||||
│ ├── `as_built_roof_classifiers.py` \
|
||||
│ └── `as_built_floor_classifiers.py` \
|
||||
├── `tests/` \
|
||||
├── `requirements.txt` \
|
||||
└── `README.md`
|
||||
|
||||
|
||||
---
|
||||
|
||||
### Lambda Entry Point (`handler.py`)
|
||||
|
||||
The Lambda handler:
|
||||
|
||||
1. Consumes SQS queue
|
||||
2. Validates the payload
|
||||
3. Instantiates the correct onboarder via `OnboarderFactory`
|
||||
4. Runs the transformation
|
||||
5. Writes the transformed CSV back to S3
|
||||
|
||||
### Expected Event Payload
|
||||
|
||||
```json
|
||||
{
|
||||
"s3_uri": "s3://bucket/path/to/input.xlsx",
|
||||
"system": "parity",
|
||||
"format": "xlsx",
|
||||
"sheet_name": "Sustainability"
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
### Onboarder Base `(base.py)`
|
||||
|
||||
OnboarderBase provides shared functionality across all systems.
|
||||
|
||||
*Responsibilities*
|
||||
|
||||
- Reading CSV/XLSX files from S3
|
||||
- Writing transformed CSVs to S3
|
||||
- Defining canonical output column names
|
||||
- Providing validation helpers
|
||||
- Common output - for the moment, onboards will be expected to return a csv
|
||||
|
||||
### Parity Onboarder `(parity.py)`
|
||||
|
||||
`ParityOnboarder` contains all Parity-specific transformation logic.
|
||||
|
||||
Responsibilities*
|
||||
|
||||
- Map raw Parity fields to internal EPC-aligned enums
|
||||
- Infer “as-built” constructions using age bands when insulation data is missing
|
||||
- Resolve energy efficiency ratings deterministically
|
||||
- Normalise output into a fixed schema
|
||||
|
||||
The `transform()` method orchestrates the transformation process.
|
||||
|
||||
### TODOs
|
||||
|
||||
- In `backend/onboarders/mappings/parity/glazing.py` we currently map the partiy descriptions
|
||||
to duples of descriptions and efficiency ratings. This is okay for the moment but we may consider
|
||||
using a data class, just given how error-prone this is.
|
||||
- This is also true for heating mappings in `backend/onboarders/mappings/parity/heating.py`
|
||||
- Implement a AI-enabled version, to replace the standardised asset list
|
||||
0
backend/onboarders/__init__.py
Normal file
0
backend/onboarders/__init__.py
Normal file
84
backend/onboarders/base.py
Normal file
84
backend/onboarders/base.py
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
import pandas as pd
|
||||
from utils.s3 import read_from_s3, read_excel_from_s3, save_csv_to_s3
|
||||
|
||||
|
||||
class OnboarderBase:
|
||||
# Input dataset to be transformed
|
||||
data: pd.DataFrame | None = None
|
||||
bucket_name = None
|
||||
input_file_name = None
|
||||
output_file_name = None
|
||||
# Description columns
|
||||
landlord_wall_construction: str = "landlord_wall_construction"
|
||||
landlord_roof_construction: str = "landlord_roof_construction"
|
||||
landlord_floor_construction: str = "landlord_floor_construction"
|
||||
landlord_windows_type: str = "landlord_windows_type"
|
||||
landlord_heating_construction: str = "landlord_heating_construction"
|
||||
landlord_fuel_type: str = "landlord_fuel_type"
|
||||
landlord_heating_controls: str = "landlord_heating_controls"
|
||||
landlord_hot_water_system: str = "landlord_hot_water_system"
|
||||
|
||||
# Efficiency columns
|
||||
landlord_roof_efficiency: str = "landlord_roof_efficiency"
|
||||
landlord_windows_efficiency: str = "landlord_windows_efficiency"
|
||||
landlord_heating_controls_efficiency: str = "landlord_heating_controls_efficiency"
|
||||
landlord_heating_efficiency: str = "landlord_heating_efficiency"
|
||||
landlord_hot_water_efficiency: str = "landlord_hot_water_efficiency"
|
||||
landlord_wall_efficiency: str = "landlord_wall_efficiency"
|
||||
|
||||
# Additional windows features
|
||||
landlord_multi_glaze_proportion: str = "landlord_multi_glaze_proportion"
|
||||
landlord_glazed_type: str = "landlord_glazed_type"
|
||||
landlord_glazed_area: str = "landlord_glazed_area"
|
||||
|
||||
# Additional roof features
|
||||
landlord_has_sloping_ceiling: str = "landlord_has_sloping_ceiling"
|
||||
|
||||
# Shape, dimensions, age
|
||||
landlord_total_floor_area_m2: str = "landlord_total_floor_area_m2"
|
||||
landlord_construction_age_band: str = "landlord_construction_age_band"
|
||||
landlord_property_type: str = "landlord_property_type"
|
||||
landlord_built_form: str = "landlord_built_form"
|
||||
|
||||
def read_s3(self, file_format, **kwargs):
|
||||
|
||||
if self.input_file_name is None or self.bucket_name is None:
|
||||
raise ValueError("Bucket name and input file name must be set before reading from S3.")
|
||||
if file_format == "xlsx":
|
||||
self.data = read_excel_from_s3(
|
||||
bucket_name=self.bucket_name,
|
||||
file_key=self.input_file_name,
|
||||
sheet_name=kwargs.get("sheet_name"),
|
||||
header_row=kwargs.get("header_row", 0)
|
||||
)
|
||||
else:
|
||||
self.data = read_from_s3(bucket_name=self.bucket_name, s3_file_name=self.input_file_name)
|
||||
|
||||
def write(self):
|
||||
if self.data is None:
|
||||
raise ValueError("No data to write. Please run transform() before writing.")
|
||||
|
||||
if self.bucket_name is None or self.output_file_name is None:
|
||||
raise ValueError("Bucket name and output file name must be set before writing to S3.")
|
||||
# Store file as csv - will store in the same route location as the input file
|
||||
save_csv_to_s3(dataframe=self.data, bucket_name=self.bucket_name, file_name=self.output_file_name)
|
||||
|
||||
@staticmethod
|
||||
def assert_nulls_only_from_source_nulls(data: pd.DataFrame, original_column: str, mapped_column: str) -> bool:
|
||||
# We only allow nulls if the original value was null
|
||||
null_vals = data[pd.isnull(data[mapped_column])]
|
||||
if null_vals.empty:
|
||||
return True
|
||||
# We make sure all original values were null
|
||||
assert pd.isnull(null_vals[original_column]).all(), (
|
||||
f"Some values in {mapped_column} were not mapped, but original values were not null"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def assert_no_nulls(data: pd.DataFrame, column: str):
|
||||
assert pd.isnull(data[column]).sum() == 0, f"column {column} contains null values, but should not"
|
||||
|
||||
def map_construction_age_band(self):
|
||||
raise NotImplementedError(
|
||||
"This method should be implemented by subclasses to map construction age bands to descriptions"
|
||||
)
|
||||
10
backend/onboarders/factory.py
Normal file
10
backend/onboarders/factory.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
from onboarders.parity import ParityOnboarder
|
||||
|
||||
|
||||
class OnboarderFactory:
|
||||
@staticmethod
|
||||
def create_onboarder(onboarder_type, **kwargs):
|
||||
if onboarder_type == "parity":
|
||||
return ParityOnboarder(**kwargs)
|
||||
|
||||
raise ValueError(f"Unknown onboarder type: {onboarder_type}")
|
||||
50
backend/onboarders/handler.py
Normal file
50
backend/onboarders/handler.py
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
import json
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Optional, Literal
|
||||
from onboarders.factory import OnboarderFactory
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class OnboardingEvent(BaseModel):
|
||||
s3_uri: str = Field(..., description="S3 URI of the raw ARA input file")
|
||||
system: Literal["parity", "generic"] = Field(..., description="Onboarding system identifier")
|
||||
format: Literal["csv", "xlsx"]
|
||||
sheet_name: Optional[str] = None
|
||||
|
||||
|
||||
def handler(event, context):
|
||||
"""
|
||||
Lambda handler that triggers the model engine for each SQS message.
|
||||
"""
|
||||
for record in event.get("Records", []):
|
||||
try:
|
||||
event_body = json.loads(record["body"])
|
||||
# Sample input data
|
||||
# event_body = {
|
||||
# "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for "
|
||||
# "Domna.xlsx",
|
||||
# "system": "parity",
|
||||
# "format": "xlsx",
|
||||
# "sheet_name": "Sustainability"
|
||||
# }
|
||||
|
||||
logger.info("Processing record with body: %s", event_body)
|
||||
|
||||
validated_event = OnboardingEvent(**event_body)
|
||||
onboarder = OnboarderFactory.create_onboarder(
|
||||
validated_event.system,
|
||||
fileuri=validated_event.s3_uri,
|
||||
format=validated_event.format,
|
||||
sheet_name=validated_event.sheet_name,
|
||||
file_format=validated_event.format
|
||||
)
|
||||
|
||||
logger.info("Transforming data")
|
||||
onboarder.transform()
|
||||
logger.info(f"Writing data to {onboarder.output_file_name}, bucket: {onboarder.bucket_name}")
|
||||
onboarder.write()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process record: {e}")
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
party_map = {
|
||||
"Before 1900": 'England and Wales: before 1900',
|
||||
"1900-1929": 'England and Wales: 1900-1929',
|
||||
"1930-1949": 'England and Wales: 1930-1949',
|
||||
"1950-1966": 'England and Wales: 1950-1966',
|
||||
"1967-1975": 'England and Wales: 1967-1975',
|
||||
"1976-1982": 'England and Wales: 1976-1982',
|
||||
"1983-1990": 'England and Wales: 1983-1990',
|
||||
"1991-1995": 'England and Wales: 1991-1995',
|
||||
"1996-2002": 'England and Wales: 1996-2002',
|
||||
"2003-2006": 'England and Wales: 2003-2006',
|
||||
"2007-2011": 'England and Wales: 2007-2011',
|
||||
"2012 onwards": 'England and Wales: 2012-2021',
|
||||
}
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
parity_map = {
|
||||
"MidTerrace": "Mid-Terrace",
|
||||
"EndTerrace": "End-Terrace",
|
||||
"Detached": "Detached",
|
||||
"SemiDetached": "Semi-Detached",
|
||||
"EnclosedMidTerrace": "Enclosed Mid-Terrace",
|
||||
"EnclosedEndTerrace": "Enclosed End-Terrace",
|
||||
}
|
||||
|
||||
# MidTerrace 41462
|
||||
# EndTerrace 20910
|
||||
# Detached 16875
|
||||
# SemiDetached 14725
|
||||
# EnclosedMidTerrace 3176
|
||||
# EnclosedEndTerrace 2393
|
||||
19
backend/onboarders/mappings/parity/age_band.py
Normal file
19
backend/onboarders/mappings/parity/age_band.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
|
||||
|
||||
parity_map = {
|
||||
"Before 1900": EpcConstructionAgeBand.before_1900,
|
||||
"1900-1929": EpcConstructionAgeBand.from_1900_to_1929,
|
||||
"1930-1949": EpcConstructionAgeBand.from_1930_to_1949,
|
||||
"1950-1966": EpcConstructionAgeBand.from_1950_to_1966,
|
||||
"1967-1975": EpcConstructionAgeBand.from_1967_to_1975,
|
||||
"1976-1982": EpcConstructionAgeBand.from_1976_to_1982,
|
||||
"1983-1990": EpcConstructionAgeBand.from_1983_to_1990,
|
||||
"1991-1995": EpcConstructionAgeBand.from_1991_to_1995,
|
||||
"1996-2002": EpcConstructionAgeBand.from_1996_to_2002,
|
||||
"2003-2006": EpcConstructionAgeBand.from_2003_to_2006,
|
||||
"2007-2011": EpcConstructionAgeBand.from_2007_to_2011,
|
||||
"2012 onwards": EpcConstructionAgeBand.from_2012_onwards,
|
||||
# Newer age bands, under SAP10
|
||||
"2012-2022": EpcConstructionAgeBand.from_2012_to_2022,
|
||||
"2023 onwards": EpcConstructionAgeBand.from_2023_onwards,
|
||||
}
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
|
||||
from datatypes.epc.floor import EpcFloorDescriptions
|
||||
|
||||
|
||||
def unknown_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
|
||||
year = age_band.start_year()
|
||||
|
||||
if year >= 2003:
|
||||
return EpcFloorDescriptions.solid_insulated_assumed
|
||||
|
||||
if year >= 1996:
|
||||
return EpcFloorDescriptions.solid_limited_insulation_assumed
|
||||
|
||||
if year >= 1930:
|
||||
return EpcFloorDescriptions.solid_no_insulation_assumed
|
||||
|
||||
return EpcFloorDescriptions.suspended_no_insulation_assumed
|
||||
|
||||
|
||||
def unknown_floor_retrofitted(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
|
||||
year = age_band.start_year()
|
||||
|
||||
if year >= 1930:
|
||||
return EpcFloorDescriptions.solid_insulated
|
||||
|
||||
return EpcFloorDescriptions.suspended_insulated
|
||||
|
||||
|
||||
def map_solid_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
|
||||
year = age_band.start_year()
|
||||
|
||||
if year >= 2003:
|
||||
return EpcFloorDescriptions.solid_insulated_assumed
|
||||
if year >= 1996:
|
||||
return EpcFloorDescriptions.solid_limited_insulation_assumed
|
||||
return EpcFloorDescriptions.solid_no_insulation_assumed
|
||||
|
||||
|
||||
def map_suspended_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
|
||||
year = age_band.start_year()
|
||||
|
||||
if year >= 2003:
|
||||
return EpcFloorDescriptions.suspended_insulated_assumed
|
||||
if year >= 1996:
|
||||
return EpcFloorDescriptions.suspended_limited_insulation_assumed
|
||||
|
||||
return EpcFloorDescriptions.suspended_no_insulation_assumed
|
||||
|
||||
|
||||
as_built_floor_classifiers = {
|
||||
"Solid": map_solid_floor_as_built,
|
||||
"SuspendedTimber": map_suspended_floor_as_built,
|
||||
"SuspendedNotTimber": map_suspended_floor_as_built,
|
||||
}
|
||||
|
||||
unknown_as_built_floor_classifiers = {
|
||||
"RetroFitted": unknown_floor_retrofitted,
|
||||
"AsBuilt": unknown_floor_as_built,
|
||||
"Unknown": unknown_floor_as_built,
|
||||
}
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
from datatypes.epc.roof import EpcRoofDescriptions
|
||||
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
|
||||
|
||||
|
||||
def map_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions:
|
||||
"""
|
||||
For a flat, as built roof, these are the breakdowns:
|
||||
|
||||
2023 onwards → Flat, insulated
|
||||
2003–2022 → Flat, insulated
|
||||
1983–2002 → Flat, insulated
|
||||
1976–1982 → Flat, limited insulation
|
||||
1967–1975 → Flat, limited insulation
|
||||
1950–1966 and earlier → Flat, no insulation
|
||||
:param age_band: Input age band
|
||||
:return: EpcRoofDescriptions
|
||||
"""
|
||||
|
||||
year = age_band.start_year()
|
||||
|
||||
if year >= 1983:
|
||||
return EpcRoofDescriptions.flat_insulated
|
||||
|
||||
if year >= 1967:
|
||||
return EpcRoofDescriptions.flat_limited_insulation
|
||||
|
||||
return EpcRoofDescriptions.flat_no_insulation
|
||||
|
||||
|
||||
def map_sloping_ceiling_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions:
|
||||
"""
|
||||
For a sloping ceiling, as built roof, these are the breakdowns:
|
||||
2023 onwards → Sloping pitched, insulated
|
||||
2003–2022 → Sloping pitched, insulated
|
||||
1983–2002 → Sloping pitched, insulated
|
||||
1976–1982 → Sloping pitched, limited insulation
|
||||
1967–1975 and earlier → Sloping pitched, no insulation
|
||||
:param age_band: Input age band
|
||||
:return: EpcRoofDescriptions
|
||||
"""
|
||||
year = age_band.start_year()
|
||||
|
||||
if year >= 1983:
|
||||
return EpcRoofDescriptions.sloping_pitched_insulated
|
||||
|
||||
if year >= 1976:
|
||||
return EpcRoofDescriptions.sloping_pitched_limited_insulation
|
||||
|
||||
return EpcRoofDescriptions.sloping_pitched_no_insulation
|
||||
|
||||
|
||||
as_built_roof_classifiers = {
|
||||
# Only need to apply this to flat and sloping ceiling roofs
|
||||
"Flat": map_flat_roof,
|
||||
"PitchedWithSlopingCeiling": map_sloping_ceiling_roof,
|
||||
}
|
||||
113
backend/onboarders/mappings/parity/as_built_wall_classifiers.py
Normal file
113
backend/onboarders/mappings/parity/as_built_wall_classifiers.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
|
||||
from datatypes.epc.walls import EpcWallDescriptions
|
||||
|
||||
|
||||
def map_cavity_wall_insulation(age_band: EpcConstructionAgeBand):
|
||||
if age_band.start_year() < 1976:
|
||||
return EpcWallDescriptions.cavity_no_insulation_assumed
|
||||
|
||||
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
|
||||
return EpcWallDescriptions.cavity_partial_insulated_assumed
|
||||
|
||||
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
|
||||
return EpcWallDescriptions.cavity_insulated_assumed
|
||||
|
||||
raise NotImplementedError(f"Age band {age_band} not handled for cavity wall as built insulation mapping")
|
||||
|
||||
|
||||
def map_solid_wall_insulation(age_band: EpcConstructionAgeBand):
|
||||
if age_band.start_year() < 1976:
|
||||
return EpcWallDescriptions.solid_brick_no_insulation_assumed
|
||||
|
||||
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
|
||||
return EpcWallDescriptions.solid_brick_partial_insulated_assumed
|
||||
|
||||
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
|
||||
return EpcWallDescriptions.solid_brick_insulated_assumed
|
||||
|
||||
raise NotImplementedError(
|
||||
f"Age band {age_band.value} not handled for solid wall insulation mapping"
|
||||
)
|
||||
|
||||
|
||||
def map_timber_frame_wall_insulation(age_band: EpcConstructionAgeBand):
|
||||
if age_band.start_year() < 1950:
|
||||
return EpcWallDescriptions.timber_frame_no_insulation_assumed
|
||||
|
||||
if age_band.start_year() < 1976:
|
||||
return EpcWallDescriptions.timber_frame_partial_insulated_assumed
|
||||
|
||||
if age_band in EpcConstructionAgeBand.from_year_onwards(1976):
|
||||
return EpcWallDescriptions.timber_frame_insulated_assumed
|
||||
|
||||
raise NotImplementedError(
|
||||
f"Age band {age_band.value} not handled for timber frame wall insulation mapping"
|
||||
)
|
||||
|
||||
|
||||
def map_system_build_wall_insulation(age_band: EpcConstructionAgeBand):
|
||||
if age_band.start_year() < 1976:
|
||||
return EpcWallDescriptions.system_no_insulation_assumed
|
||||
|
||||
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
|
||||
return EpcWallDescriptions.system_partial_insulated_assumed
|
||||
|
||||
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
|
||||
return EpcWallDescriptions.system_insulated_assumed
|
||||
|
||||
raise NotImplementedError(
|
||||
f"Age band {age_band.value} not handled for system build wall insulation mapping"
|
||||
)
|
||||
|
||||
|
||||
def map_granite_wall_insulation(age_band: EpcConstructionAgeBand):
|
||||
if age_band.start_year() < 1976:
|
||||
return EpcWallDescriptions.granite_whinstone_no_insulation_assumed
|
||||
|
||||
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
|
||||
return EpcWallDescriptions.granite_whinstone_partial_insulated_assumed
|
||||
|
||||
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
|
||||
return EpcWallDescriptions.granite_whinestone_insulated_assumed
|
||||
|
||||
raise NotImplementedError(
|
||||
f"Age band {age_band.value} not handled for granite wall insulation mapping"
|
||||
)
|
||||
|
||||
|
||||
def map_sandstone_wall_insulation(age_band: EpcConstructionAgeBand):
|
||||
if age_band.start_year() < 1976:
|
||||
return EpcWallDescriptions.sandstone_limestone_no_insulation_assumed
|
||||
|
||||
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
|
||||
return EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed
|
||||
|
||||
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
|
||||
return EpcWallDescriptions.sandstone_limestone_insulated_assumed
|
||||
|
||||
raise NotImplementedError(
|
||||
f"Age band {age_band.value} not handled for sandstone wall insulation mapping"
|
||||
)
|
||||
|
||||
|
||||
def map_cob_wall_insulation(age_band: EpcConstructionAgeBand):
|
||||
if age_band.start_year() < 1983:
|
||||
return EpcWallDescriptions.cob_as_built_average
|
||||
|
||||
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
|
||||
return EpcWallDescriptions.cob_as_built_good
|
||||
|
||||
raise NotImplementedError(
|
||||
f"Age band {age_band.value} not handled for cob wall insulation mapping"
|
||||
)
|
||||
|
||||
|
||||
as_built_wall_classifiers = {
|
||||
"Cavity": map_cavity_wall_insulation,
|
||||
"Solid Brick": map_solid_wall_insulation,
|
||||
"Timber Frame": map_timber_frame_wall_insulation,
|
||||
"System": map_system_build_wall_insulation,
|
||||
"Granite": map_granite_wall_insulation,
|
||||
"Sandstone": map_sandstone_wall_insulation,
|
||||
"Cob": map_cob_wall_insulation,
|
||||
}
|
||||
10
backend/onboarders/mappings/parity/built_form.py
Normal file
10
backend/onboarders/mappings/parity/built_form.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
from datatypes.epc.property_type_built_form import BuiltForm
|
||||
|
||||
parity_map = {
|
||||
"MidTerrace": BuiltForm.mid_terrace,
|
||||
"EndTerrace": BuiltForm.end_terrace,
|
||||
"Detached": BuiltForm.detached,
|
||||
"SemiDetached": BuiltForm.semi_detached,
|
||||
"EnclosedMidTerrace": BuiltForm.enclosed_mid_terrace,
|
||||
"EnclosedEndTerrace": BuiltForm.enclosed_end_terrace,
|
||||
}
|
||||
26
backend/onboarders/mappings/parity/floor.py
Normal file
26
backend/onboarders/mappings/parity/floor.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
from numpy import nan
|
||||
from datatypes.epc.floor import EpcFloorDescriptions
|
||||
|
||||
floor_map = {
|
||||
# Solid floor
|
||||
('Solid', 'AsBuilt'): None, # Mapped
|
||||
('Solid', 'Unknown'): None, # Mapped
|
||||
('Solid', nan): None, # Mapped
|
||||
('Solid', 'RetroFitted'): EpcFloorDescriptions.solid_insulated,
|
||||
|
||||
# Suspended floor
|
||||
('SuspendedTimber', nan): None, # Mapped suspended_floor_as_built
|
||||
('SuspendedTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built
|
||||
('SuspendedTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated,
|
||||
('SuspendedTimber', 'Unknown'): None, # Mapped suspended_floor_as_built
|
||||
('SuspendedNotTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated,
|
||||
('SuspendedNotTimber', nan): None, # Mapped suspended_floor_as_built
|
||||
('SuspendedNotTimber', 'Unknown'): None, # Mapped suspended_floor_as_built
|
||||
('SuspendedNotTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built
|
||||
|
||||
# Unknown type - mapped on age
|
||||
('Unknown', 'Unknown'): None, # Mapped unknown_floor_as_built
|
||||
('Unknown', 'RetroFitted'): None, # Mapped unknown_floor_retrofitted
|
||||
(nan, nan): None, # No actual information!
|
||||
('Unknown', 'AsBuilt'): None, # Mapped unknown_floor_as_built
|
||||
}
|
||||
20
backend/onboarders/mappings/parity/glazing.py
Normal file
20
backend/onboarders/mappings/parity/glazing.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
from datatypes.epc.efficiency import EpcEfficiency
|
||||
|
||||
glazing_map = {
|
||||
# (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area
|
||||
# For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more
|
||||
"Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None),
|
||||
"Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
|
||||
"Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
|
||||
"Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None),
|
||||
# For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022
|
||||
# installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to
|
||||
# how we make updates to the windows data.
|
||||
# Triple known data is high performance glazing with Good efficiency (at least)
|
||||
"Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None),
|
||||
# This is also classed as high performance glazing
|
||||
"DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
|
||||
# Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good)
|
||||
"Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None),
|
||||
"TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
|
||||
}
|
||||
330
backend/onboarders/mappings/parity/heating.py
Normal file
330
backend/onboarders/mappings/parity/heating.py
Normal file
|
|
@ -0,0 +1,330 @@
|
|||
from datatypes.epc.main_heating import EpcHeatingSystems
|
||||
from datatypes.epc.efficiency import EpcEfficiency
|
||||
from datatypes.epc.fuel import EpcFuel
|
||||
from datatypes.epc.heating_controls import EpcHeatingControls
|
||||
from datatypes.epc.hotwater import EpcHotWaterSystems
|
||||
|
||||
heating_map = {
|
||||
# 0
|
||||
('Boilers', 'A', 'ElectricityNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 1
|
||||
('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 2
|
||||
('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): (
|
||||
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 3
|
||||
('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 4
|
||||
('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 5
|
||||
('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 6
|
||||
('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 7
|
||||
('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 8
|
||||
('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 9
|
||||
('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 10
|
||||
('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 11
|
||||
('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 12
|
||||
('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): (
|
||||
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 13
|
||||
('Boilers', 'C', 'LPGNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 14
|
||||
('Boilers', 'C', 'LPGNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 15
|
||||
('Boilers', 'C', 'MainsGasNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 16
|
||||
('Boilers', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 17
|
||||
('Boilers', 'C', 'MainsGasNotCommunity', 'Top Spec'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
('Boilers', 'C', 'OilNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 19
|
||||
('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 20
|
||||
('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): (
|
||||
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
|
||||
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 21
|
||||
('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 22
|
||||
('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 23
|
||||
('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 24
|
||||
('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 25
|
||||
('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 26
|
||||
('Boilers', 'E', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 27
|
||||
('Boilers', 'E', 'MainsGasNotCommunity', 'Top Spec'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 28
|
||||
('Boilers', 'E', 'OilNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 29
|
||||
('Boilers', 'E', 'OilNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 30
|
||||
('Boilers', 'F', 'MainsGasNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 31
|
||||
('Boilers', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 32
|
||||
('Boilers', 'F', 'MainsGasNotCommunity', 'Top Spec'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 33
|
||||
('Boilers', 'G', 'MainsGasNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 34
|
||||
('Boilers', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 35
|
||||
('Boilers', 'G', 'MainsGasNotCommunity', 'Top Spec'): (
|
||||
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 36
|
||||
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 37
|
||||
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 38
|
||||
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Top Spec'): (
|
||||
EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
|
||||
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 39
|
||||
('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 40
|
||||
('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 41
|
||||
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 42
|
||||
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 43
|
||||
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Top Spec'): (
|
||||
EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
|
||||
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 44
|
||||
('Room heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 45
|
||||
('Room heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 46
|
||||
('Room heaters', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
|
||||
),
|
||||
# 47 - water done from here
|
||||
('Room heaters', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
|
||||
),
|
||||
('Room heaters', 'G', 'MainsGasNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
|
||||
),
|
||||
('Room heaters', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
|
||||
),
|
||||
('Room heaters', 'G', 'SmokelessCoal', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.room_heaters_smokeless_fuel, EpcEfficiency.VERY_POOR, EpcFuel.smokeless_coal,
|
||||
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
|
||||
),
|
||||
('Storage heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): (
|
||||
EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.automatic_charge_control, EpcEfficiency.AVERAGE,
|
||||
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
|
||||
),
|
||||
('Storage heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.manual_charge_control, EpcEfficiency.POOR,
|
||||
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
|
||||
),
|
||||
('Warm Air (not heat pump)', 'G', 'ElectricityNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.warm_air_electricaire, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
|
||||
EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
|
||||
),
|
||||
('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): (
|
||||
EpcHeatingSystems.warm_air_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
|
||||
EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD,
|
||||
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
|
||||
)
|
||||
}
|
||||
8
backend/onboarders/mappings/parity/property_type.py
Normal file
8
backend/onboarders/mappings/parity/property_type.py
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
from datatypes.epc.property_type_built_form import PropertyType
|
||||
|
||||
parity_map = {
|
||||
"Flat": PropertyType.flat,
|
||||
"Maisonette": PropertyType.maisonette,
|
||||
"Bungalow": PropertyType.bungalow,
|
||||
"House": PropertyType.house,
|
||||
}
|
||||
461
backend/onboarders/mappings/parity/roof.py
Normal file
461
backend/onboarders/mappings/parity/roof.py
Normal file
|
|
@ -0,0 +1,461 @@
|
|||
import pandas as pd
|
||||
from numpy import nan
|
||||
from typing import Union, Callable
|
||||
from collections.abc import Mapping
|
||||
from datatypes.epc.roof import EpcRoofDescriptions
|
||||
from datatypes.epc.efficiency import EpcEfficiency
|
||||
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
|
||||
|
||||
roof_map = {
|
||||
# Dwelling above
|
||||
('AnotherDwellingAbove', 'Another Dwelling Above'): EpcRoofDescriptions.another_dwelling_above,
|
||||
('SameDwellingAbove', 'Same Dwelling Above'): EpcRoofDescriptions.another_dwelling_above,
|
||||
# Pitched, normal loft access, with a loft thickness
|
||||
('PitchedNormalLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation,
|
||||
('PitchedNormalLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation,
|
||||
('PitchedNormalLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation,
|
||||
('PitchedNormalLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation,
|
||||
('PitchedNormalLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation,
|
||||
('PitchedNormalLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation,
|
||||
('PitchedNormalLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation,
|
||||
('PitchedNormalLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation,
|
||||
('PitchedNormalLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation,
|
||||
('PitchedNormalLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation,
|
||||
('PitchedNormalLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation,
|
||||
|
||||
# Pitched, no loft access, with a loft thickness
|
||||
('PitchedNormalNoLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation,
|
||||
('PitchedNormalNoLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation,
|
||||
('PitchedNormalNoLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation,
|
||||
('PitchedNormalNoLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation,
|
||||
('PitchedNormalNoLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation,
|
||||
('PitchedNormalNoLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation,
|
||||
('PitchedNormalNoLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation,
|
||||
('PitchedNormalNoLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation,
|
||||
('PitchedNormalNoLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation,
|
||||
('PitchedNormalNoLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation,
|
||||
('PitchedNormalNoLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation,
|
||||
|
||||
# All pitched options with asbuilt or unknown got to EpcRoofDescriptions.pitched_insulated_assumed
|
||||
# With access
|
||||
('PitchedNormalLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed,
|
||||
('PitchedNormalLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed,
|
||||
('PitchedNormalLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed,
|
||||
# No access
|
||||
('PitchedNormalNoLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed,
|
||||
('PitchedNormalNoLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed,
|
||||
('PitchedNormalNoLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed,
|
||||
|
||||
# Flat
|
||||
('Flat', 'NoInsulation'): EpcRoofDescriptions.flat_no_insulation,
|
||||
# Flat - limited insulation
|
||||
('Flat', '12mm'): EpcRoofDescriptions.flat_limited_insulation,
|
||||
('Flat', 'mm25'): EpcRoofDescriptions.flat_limited_insulation,
|
||||
('Flat', 'mm50'): EpcRoofDescriptions.flat_limited_insulation,
|
||||
# Flat insulated
|
||||
('Flat', 'mm75'): EpcRoofDescriptions.flat_insulated,
|
||||
('Flat', 'mm100'): EpcRoofDescriptions.flat_insulated,
|
||||
('Flat', 'mm150'): EpcRoofDescriptions.flat_insulated,
|
||||
('Flat', 'mm200'): EpcRoofDescriptions.flat_insulated,
|
||||
('Flat', 'mm250'): EpcRoofDescriptions.flat_insulated,
|
||||
('Flat', 'mm300'): EpcRoofDescriptions.flat_insulated,
|
||||
('Flat', 'mm350'): EpcRoofDescriptions.flat_insulated,
|
||||
('Flat', 'mm400'): EpcRoofDescriptions.flat_insulated,
|
||||
# Flat - as built or unknown
|
||||
('Flat', 'AsBuilt'): None, # To be classified
|
||||
('Flat', nan): None, # To be classified
|
||||
('Flat', 'Unknown'): None, # To be classified
|
||||
|
||||
# 12mm = very poor & has limited insulation description
|
||||
# 25, 50 = poor & has limited insulation description
|
||||
# 75, 100, 125mm = average (Flat, insulated)
|
||||
# 150, 175, 200, 225, 250mm = good (Flat, insulated)
|
||||
# 270mm+ = very good (Flat, insulated)
|
||||
|
||||
# Thatched
|
||||
('PitchedThatched', 'mm50'): EpcRoofDescriptions.thatched_with_additional_insulation,
|
||||
('PitchedThatched', 'mm150'): EpcRoofDescriptions.thatched_with_additional_insulation,
|
||||
('PitchedThatched', 'mm300'): EpcRoofDescriptions.thatched_with_additional_insulation,
|
||||
('PitchedThatched', 'Unknown'): EpcRoofDescriptions.thatched, # efficiency classified based on age
|
||||
|
||||
# Sloping:
|
||||
# Limited (12 very poor, 25-50 poor)
|
||||
('PitchedWithSlopingCeiling', 'mm12'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
|
||||
('PitchedWithSlopingCeiling', 'mm25'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
|
||||
('PitchedWithSlopingCeiling', 'mm50'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
|
||||
# Insulated 75mm+ (75 - 125 average, 150 - 250 good, 270+ very good)
|
||||
('PitchedWithSlopingCeiling', 'mm75'): EpcRoofDescriptions.sloping_pitched_insulated,
|
||||
('PitchedWithSlopingCeiling', 'mm100'): EpcRoofDescriptions.sloping_pitched_insulated,
|
||||
('PitchedWithSlopingCeiling', 'mm150'): EpcRoofDescriptions.sloping_pitched_insulated,
|
||||
('PitchedWithSlopingCeiling', 'mm200'): EpcRoofDescriptions.sloping_pitched_insulated,
|
||||
('PitchedWithSlopingCeiling', 'mm250'): EpcRoofDescriptions.sloping_pitched_insulated,
|
||||
('PitchedWithSlopingCeiling', 'mm270'): EpcRoofDescriptions.sloping_pitched_insulated,
|
||||
('PitchedWithSlopingCeiling', 'mm300'): EpcRoofDescriptions.sloping_pitched_insulated,
|
||||
('PitchedWithSlopingCeiling', 'mm350'): EpcRoofDescriptions.sloping_pitched_insulated,
|
||||
('PitchedWithSlopingCeiling', 'mm400'): EpcRoofDescriptions.sloping_pitched_insulated,
|
||||
# As built/unknown
|
||||
('PitchedWithSlopingCeiling', 'AsBuilt'): None, # To be classified
|
||||
('PitchedWithSlopingCeiling', nan): None, # To be classified
|
||||
('PitchedWithSlopingCeiling', 'Unknown'): None, #
|
||||
}
|
||||
|
||||
roof_unknown_age_fallback = {
|
||||
"Flat": EpcRoofDescriptions.flat_as_built_unknown,
|
||||
"PitchedWithSlopingCeiling": EpcRoofDescriptions.sloping_pitched_as_built_unknown,
|
||||
"PitchedThatched": EpcRoofDescriptions.thatched_as_built_unknown,
|
||||
"PitchedNormalLoftAccess": EpcRoofDescriptions.loft_as_built_unknown,
|
||||
"PitchedNormalNoLoftAccess": EpcRoofDescriptions.loft_as_built_unknown,
|
||||
}
|
||||
|
||||
RoofEfficiencyRule = Union[
|
||||
EpcEfficiency,
|
||||
Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency],
|
||||
]
|
||||
|
||||
|
||||
def flat_insulated_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
|
||||
"""
|
||||
before 1900, 1900-1929, 1930-1949, 1950-1966, 1967-1975 -> Pitched, no insulation, Very Poor
|
||||
1976-1982 -> Pitched, limited insulation, Poor
|
||||
1983-1990, to 1996-2002 Pitched, insulated, Average
|
||||
2003 - 2006, 2012-2022 -> Pitched, insulated, Good
|
||||
2023 onwards -> Pitched, insulated, Very Good
|
||||
:param age_band: EpcConstructionAgeBand
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
|
||||
start_year = age_band.start_year()
|
||||
if start_year >= 2023:
|
||||
return EpcEfficiency.VERY_GOOD
|
||||
|
||||
if start_year >= 2003:
|
||||
return EpcEfficiency.GOOD
|
||||
|
||||
if start_year >= 1983:
|
||||
return EpcEfficiency.AVERAGE
|
||||
|
||||
if start_year >= 1976:
|
||||
return EpcEfficiency.POOR
|
||||
|
||||
return EpcEfficiency.VERY_POOR
|
||||
|
||||
|
||||
def flat_insulated_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
|
||||
"""
|
||||
12mm -> Very Poor
|
||||
25mm - 50mm -> Poor
|
||||
75mm - 125mm -> Pitched, insulated, average
|
||||
150mm - 250mm -> good
|
||||
270mm+ -> very good
|
||||
:param insulation_thickness: Insulation thickness in mm
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
|
||||
if insulation_thickness is None:
|
||||
raise ValueError("Insulation thickness is required for flat insulated efficiency calculation")
|
||||
|
||||
if insulation_thickness >= 270:
|
||||
return EpcEfficiency.VERY_GOOD
|
||||
|
||||
if 150 <= insulation_thickness <= 250:
|
||||
return EpcEfficiency.GOOD
|
||||
|
||||
if 75 <= insulation_thickness <= 125:
|
||||
return EpcEfficiency.AVERAGE
|
||||
|
||||
if 25 <= insulation_thickness <= 50:
|
||||
return EpcEfficiency.POOR
|
||||
|
||||
return EpcEfficiency.VERY_POOR
|
||||
|
||||
|
||||
def flat_efficiency(insulation_thickness: int | None, age_band: EpcConstructionAgeBand) -> EpcEfficiency:
|
||||
"""
|
||||
Combines both age band and insulation thickness to determine flat roof efficiency.
|
||||
:param insulation_thickness: Insulation thickness in mm
|
||||
:param age_band: EpcConstructionAgeBand
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
if insulation_thickness is not None:
|
||||
return flat_insulated_efficiency_thickness(insulation_thickness)
|
||||
|
||||
return flat_insulated_efficiency_age_band(age_band)
|
||||
|
||||
|
||||
def loft_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
|
||||
"""
|
||||
2023 onwards -> Very Good
|
||||
2012-2022 -> Very Good
|
||||
2007-2011 -> Very Good
|
||||
2003-2006 -> Very Good
|
||||
1996-2002 -> Good
|
||||
1991-1995 -> Good
|
||||
1983-1990 -> Average
|
||||
1976-1982 -> Average
|
||||
1967-1975 -> Average
|
||||
1950-1966 -> Average
|
||||
1930-1949 -> Average
|
||||
1900-1929 -> Average
|
||||
before 1900 -> Average
|
||||
:param age_band: Input age band, EpcConstructionAgeBand
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
year = age_band.start_year()
|
||||
if year >= 2003:
|
||||
return EpcEfficiency.VERY_GOOD
|
||||
if year >= 1991:
|
||||
return EpcEfficiency.GOOD
|
||||
|
||||
return EpcEfficiency.AVERAGE
|
||||
|
||||
|
||||
def thatched_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
|
||||
"""
|
||||
Maps thatched roof efficiency based on construction age band.
|
||||
:param age_band: EpcConstructionAgeBand
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
year = age_band.start_year()
|
||||
if year >= 2023:
|
||||
return EpcEfficiency.VERY_GOOD
|
||||
if year >= 2003:
|
||||
return EpcEfficiency.GOOD
|
||||
|
||||
return EpcEfficiency.AVERAGE
|
||||
|
||||
|
||||
def thatched_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
|
||||
"""
|
||||
Maps thatched roof efficiency based on insulation thickness.
|
||||
:param insulation_thickness: Insulation thickness in mm
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
if insulation_thickness is None:
|
||||
raise ValueError("Insulation thickness is required for thatched efficiency calculation")
|
||||
|
||||
if insulation_thickness >= 175:
|
||||
return EpcEfficiency.VERY_GOOD
|
||||
|
||||
if insulation_thickness >= 25:
|
||||
return EpcEfficiency.GOOD
|
||||
|
||||
return EpcEfficiency.AVERAGE
|
||||
|
||||
|
||||
def thatched_efficiency(
|
||||
insulation_thickness: int | None,
|
||||
age_band: EpcConstructionAgeBand,
|
||||
) -> EpcEfficiency:
|
||||
"""
|
||||
Combines both age band and insulation thickness to determine thatched roof efficiency.
|
||||
:param insulation_thickness: Insulation thickness in mm
|
||||
:param age_band: EpcConstructionAgeBand
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
if insulation_thickness is not None:
|
||||
return thatched_efficiency_thickness(insulation_thickness)
|
||||
|
||||
return thatched_efficiency_age_band(age_band)
|
||||
|
||||
|
||||
def sloping_ceiling_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
|
||||
"""
|
||||
Maps sloping ceiling roof efficiency based on construction age band.
|
||||
:param age_band: EpcConstructionAgeBand
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
year = age_band.start_year()
|
||||
if year >= 2023:
|
||||
return EpcEfficiency.VERY_GOOD
|
||||
if year >= 2003:
|
||||
return EpcEfficiency.GOOD
|
||||
if year >= 1983:
|
||||
return EpcEfficiency.AVERAGE
|
||||
if year >= 1976:
|
||||
return EpcEfficiency.POOR
|
||||
|
||||
return EpcEfficiency.VERY_POOR
|
||||
|
||||
|
||||
def sloping_ceiling_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
|
||||
"""
|
||||
Maps sloping ceiling roof efficiency based on insulation thickness.
|
||||
:param insulation_thickness: Insulation thickness in mm
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
if insulation_thickness is None:
|
||||
raise ValueError("Insulation thickness is required for sloping ceiling efficiency calculation")
|
||||
|
||||
if insulation_thickness >= 270:
|
||||
return EpcEfficiency.VERY_GOOD
|
||||
|
||||
if insulation_thickness >= 150:
|
||||
return EpcEfficiency.GOOD
|
||||
|
||||
if insulation_thickness >= 75:
|
||||
return EpcEfficiency.AVERAGE
|
||||
|
||||
if insulation_thickness >= 25:
|
||||
return EpcEfficiency.POOR
|
||||
|
||||
return EpcEfficiency.VERY_POOR
|
||||
|
||||
|
||||
def sloping_ceiling_efficiency(
|
||||
insulation_thickness: int | None,
|
||||
age_band: EpcConstructionAgeBand,
|
||||
) -> EpcEfficiency:
|
||||
"""
|
||||
Combines both age band and insulation thickness to determine sloping ceiling roof efficiency.
|
||||
:param insulation_thickness: Insulation thickness in mm
|
||||
:param age_band: EpcConstructionAgeBand
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
if insulation_thickness is not None:
|
||||
return sloping_ceiling_efficiency_thickness(insulation_thickness)
|
||||
|
||||
return sloping_ceiling_efficiency_age_band(age_band)
|
||||
|
||||
|
||||
def loft_insulated_at_rafters_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
|
||||
"""
|
||||
400mm, 350mm = very good
|
||||
200-300mm = good
|
||||
125-175 = average
|
||||
50-100 = poor
|
||||
25 and below= very poor
|
||||
:return:
|
||||
"""
|
||||
if insulation_thickness is None:
|
||||
raise ValueError("Insulation thickness is required for loft insulated at rafters efficiency calculation")
|
||||
|
||||
if insulation_thickness >= 350:
|
||||
return EpcEfficiency.VERY_GOOD
|
||||
|
||||
if insulation_thickness >= 200:
|
||||
return EpcEfficiency.GOOD
|
||||
|
||||
if insulation_thickness >= 125:
|
||||
return EpcEfficiency.AVERAGE
|
||||
|
||||
if insulation_thickness >= 50:
|
||||
return EpcEfficiency.POOR
|
||||
|
||||
return EpcEfficiency.VERY_POOR
|
||||
|
||||
|
||||
def loft_insulated_at_rafters_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
|
||||
"""
|
||||
# 2023 onwards -> Very Good
|
||||
# 2003-2006, 2012-2022 -> Good
|
||||
# 1983 - 1990, 1996-2002 -> Average
|
||||
# 1976-1982 -> Poor
|
||||
# 1967-1975 and earlier bands -> Very Poor
|
||||
:param age_band: EpcConstructionAgeBand
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
year = age_band.start_year()
|
||||
if year >= 2023:
|
||||
return EpcEfficiency.VERY_GOOD
|
||||
if year >= 2003:
|
||||
return EpcEfficiency.GOOD
|
||||
if year >= 1983:
|
||||
return EpcEfficiency.AVERAGE
|
||||
if year >= 1976:
|
||||
return EpcEfficiency.POOR
|
||||
|
||||
return EpcEfficiency.VERY_POOR
|
||||
|
||||
|
||||
def loft_insulated_at_rafters_efficiency(
|
||||
insulation_thickness: int | None,
|
||||
age_band: EpcConstructionAgeBand,
|
||||
) -> EpcEfficiency:
|
||||
"""
|
||||
Combines both age band and insulation thickness to determine loft insulated at rafters roof efficiency.
|
||||
:param insulation_thickness: Insulation thickness in mm
|
||||
:param age_band: EpcConstructionAgeBand
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
if insulation_thickness is not None:
|
||||
return loft_insulated_at_rafters_efficiency_thickness(insulation_thickness)
|
||||
|
||||
return loft_insulated_at_rafters_efficiency_age_band(age_band)
|
||||
|
||||
|
||||
ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] = {
|
||||
# Flat roof
|
||||
EpcRoofDescriptions.flat_no_insulation: EpcEfficiency.VERY_POOR,
|
||||
EpcRoofDescriptions.flat_limited_insulation: flat_efficiency,
|
||||
EpcRoofDescriptions.flat_insulated: flat_efficiency,
|
||||
|
||||
# Loft:
|
||||
# value mappings
|
||||
EpcRoofDescriptions.loft_12mm_insulation: EpcEfficiency.VERY_POOR,
|
||||
EpcRoofDescriptions.loft_25mm_insulation: EpcEfficiency.POOR,
|
||||
EpcRoofDescriptions.loft_50mm_insulation: EpcEfficiency.POOR,
|
||||
EpcRoofDescriptions.loft_75mm_insulation: EpcEfficiency.AVERAGE,
|
||||
EpcRoofDescriptions.loft_100mm_insulation: EpcEfficiency.AVERAGE,
|
||||
EpcRoofDescriptions.loft_125mm_insulation: EpcEfficiency.AVERAGE,
|
||||
EpcRoofDescriptions.loft_150mm_insulation: EpcEfficiency.GOOD,
|
||||
EpcRoofDescriptions.loft_175mm_insulation: EpcEfficiency.GOOD,
|
||||
EpcRoofDescriptions.loft_200mm_insulation: EpcEfficiency.GOOD,
|
||||
EpcRoofDescriptions.loft_250mm_insulation: EpcEfficiency.GOOD,
|
||||
EpcRoofDescriptions.loft_270mm_insulation: EpcEfficiency.VERY_GOOD,
|
||||
EpcRoofDescriptions.loft_300mm_insulation: EpcEfficiency.VERY_GOOD,
|
||||
EpcRoofDescriptions.loft_350mm_insulation: EpcEfficiency.VERY_GOOD,
|
||||
EpcRoofDescriptions.loft_400mm_plus_insulation: EpcEfficiency.VERY_GOOD,
|
||||
EpcRoofDescriptions.pitched_no_insulation: EpcEfficiency.VERY_POOR,
|
||||
# function mappings
|
||||
EpcRoofDescriptions.pitched_insulated_assumed: loft_insulated_efficiency,
|
||||
|
||||
# Loft af rafters
|
||||
EpcRoofDescriptions.loft_insulated_at_rafters: loft_insulated_at_rafters_efficiency,
|
||||
|
||||
# Another dwelling above
|
||||
EpcRoofDescriptions.another_dwelling_above: EpcEfficiency.NA,
|
||||
|
||||
# Thatched
|
||||
EpcRoofDescriptions.thatched: thatched_efficiency,
|
||||
EpcRoofDescriptions.thatched_with_additional_insulation: thatched_efficiency,
|
||||
|
||||
# Sloping ceiling
|
||||
EpcRoofDescriptions.sloping_pitched_insulated: sloping_ceiling_efficiency,
|
||||
EpcRoofDescriptions.sloping_pitched_limited_insulation: sloping_ceiling_efficiency,
|
||||
EpcRoofDescriptions.sloping_pitched_no_insulation: EpcEfficiency.VERY_POOR,
|
||||
|
||||
}
|
||||
|
||||
|
||||
def resolve_roof_efficiency(
|
||||
description: EpcRoofDescriptions,
|
||||
age_band: EpcConstructionAgeBand | None,
|
||||
insulation_thickness: int | None,
|
||||
) -> EpcEfficiency:
|
||||
"""
|
||||
Resolve roof efficiency from description + age band + insulation thickness.
|
||||
"""
|
||||
|
||||
# Unknown / holding descriptions → efficiency unknown
|
||||
if description in description.unknown_descriptions:
|
||||
return EpcEfficiency.NA
|
||||
|
||||
rule = ROOF_DESCRIPTION_EFFICIENCIES.get(description)
|
||||
|
||||
if rule is None:
|
||||
return EpcEfficiency.NA
|
||||
|
||||
# Fixed efficiency
|
||||
if isinstance(rule, EpcEfficiency):
|
||||
return rule
|
||||
|
||||
# Callable rule
|
||||
if age_band is None or pd.isnull(age_band):
|
||||
return EpcEfficiency.NA
|
||||
|
||||
try:
|
||||
# Try (thickness, age_band)
|
||||
return rule(insulation_thickness, age_band)
|
||||
except TypeError:
|
||||
# Fallback to (age_band)
|
||||
return rule(age_band)
|
||||
211
backend/onboarders/mappings/parity/walls.py
Normal file
211
backend/onboarders/mappings/parity/walls.py
Normal file
|
|
@ -0,0 +1,211 @@
|
|||
from typing import Callable, Union
|
||||
from collections.abc import Mapping
|
||||
from datatypes.epc.walls import EpcWallDescriptions
|
||||
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
|
||||
from datatypes.epc.efficiency import EpcEfficiency
|
||||
|
||||
# Unique combinations
|
||||
wall_map = {
|
||||
# Cavity walls
|
||||
('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity,
|
||||
('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation,
|
||||
('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation,
|
||||
('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal,
|
||||
('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external,
|
||||
('Cavity', 'AsBuilt'): None, # To be classified
|
||||
('Cavity', 'Unknown'): None, # To be classified
|
||||
|
||||
# System built walls
|
||||
('System', 'External'): EpcWallDescriptions.system_external_insulation,
|
||||
('System', 'Internal'): EpcWallDescriptions.system_internal_insulation,
|
||||
('System', 'AsBuilt'): None, # To be classified
|
||||
('System', 'Unknown'): None,
|
||||
|
||||
# Timber Frame walls
|
||||
('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation,
|
||||
('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation,
|
||||
('Timber Frame', 'AsBuilt'): None, # To be classified
|
||||
('Timber Frame', 'Unknown'): None,
|
||||
|
||||
# Solid Brick walls
|
||||
('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation,
|
||||
('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation,
|
||||
('Solid Brick', 'AsBuilt'): None, # To be classified
|
||||
('Solid Brick', 'Unknown'): None,
|
||||
|
||||
# Granite walls
|
||||
('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation,
|
||||
("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation,
|
||||
('Granite', 'AsBuilt'): None,
|
||||
('Granite', 'Unknown'): None,
|
||||
|
||||
# Sandstone walls
|
||||
('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation,
|
||||
('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation,
|
||||
('Sandstone', 'Unknown'): None,
|
||||
('Sandstone', 'AsBuilt'): None,
|
||||
|
||||
# Cob walls
|
||||
('Cob', 'AsBuilt'): None,
|
||||
}
|
||||
|
||||
wall_unknown_age_fallback = {
|
||||
"Cavity": EpcWallDescriptions.cavity_as_built_unknown,
|
||||
"Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown,
|
||||
"Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown,
|
||||
"System": EpcWallDescriptions.system_as_built_unknown,
|
||||
"Granite": EpcWallDescriptions.granite_as_built_unknown,
|
||||
"Sandstone": EpcWallDescriptions.sandstone_as_built_unknown,
|
||||
"Cob": EpcWallDescriptions.cob_as_built_unknown,
|
||||
}
|
||||
|
||||
|
||||
def cavity_filled_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
|
||||
""""
|
||||
Maps cavity filled to efficiency based on construction age band.
|
||||
:param age_band: EpcConstructionAgeBand
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
if age_band in {
|
||||
EpcConstructionAgeBand.from_2023_onwards
|
||||
}:
|
||||
return EpcEfficiency.VERY_GOOD
|
||||
|
||||
return EpcEfficiency.GOOD
|
||||
|
||||
|
||||
def internal_external_insulation_efficiency(
|
||||
age_band: EpcConstructionAgeBand,
|
||||
) -> EpcEfficiency:
|
||||
"""
|
||||
Maps:
|
||||
- cavity unfilled with internal/external insulation to efficiency based on construction age band. We assumed
|
||||
based on 100mm insulation
|
||||
- solid brick with internal/external insulation to efficiency based on construction age band. We assumed
|
||||
based on 100mm insulation
|
||||
- system built with internal/external insulation to efficiency based on construction age band. We assumed
|
||||
based on 100mm insulation
|
||||
|
||||
All of these wall types have the same behaviour in elmhurst
|
||||
:param age_band: EpcConstructionAgeBand
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
if age_band in {
|
||||
EpcConstructionAgeBand.from_1983_to_1990,
|
||||
EpcConstructionAgeBand.from_1991_to_1995,
|
||||
EpcConstructionAgeBand.from_1996_to_2002,
|
||||
EpcConstructionAgeBand.from_2003_to_2006,
|
||||
EpcConstructionAgeBand.from_2007_to_2011,
|
||||
EpcConstructionAgeBand.from_2012_to_2022,
|
||||
EpcConstructionAgeBand.from_2023_onwards,
|
||||
}:
|
||||
return EpcEfficiency.VERY_GOOD
|
||||
|
||||
return EpcEfficiency.GOOD
|
||||
|
||||
|
||||
def timber_granite_sandstone_internal_external_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
|
||||
""""
|
||||
Maps:
|
||||
- timber frame with internal/external wall insulation to efficiency based on construction age band.
|
||||
- sandstone/limestone with internal/external wall insulation to efficiency based on construction age band.
|
||||
- granite/whinstone with internal/external wall insulation to efficiency based on construction age band.
|
||||
:param age_band: EpcConstructionAgeBand
|
||||
:return: EpcEfficiency
|
||||
"""
|
||||
if age_band in {
|
||||
EpcConstructionAgeBand.from_2023_onwards
|
||||
}:
|
||||
return EpcEfficiency.VERY_GOOD
|
||||
|
||||
return EpcEfficiency.GOOD
|
||||
|
||||
|
||||
WallEfficiencyRule = Union[
|
||||
EpcEfficiency,
|
||||
Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency],
|
||||
]
|
||||
|
||||
WALL_DESCRIPTION_EFFICIENCIES: Mapping[EpcWallDescriptions, WallEfficiencyRule] = {
|
||||
# Note: all function mappings have been defined based on Elmhurst
|
||||
# Cavity
|
||||
# value mappings
|
||||
EpcWallDescriptions.cavity_no_insulation_assumed: EpcEfficiency.POOR,
|
||||
EpcWallDescriptions.cavity_partial_insulated_assumed: EpcEfficiency.AVERAGE,
|
||||
EpcWallDescriptions.cavity_insulated_assumed: EpcEfficiency.GOOD,
|
||||
EpcWallDescriptions.cavity_filled_plus_internal: EpcEfficiency.VERY_GOOD,
|
||||
EpcWallDescriptions.cavity_filled_plus_external: EpcEfficiency.VERY_GOOD,
|
||||
# function mappings
|
||||
EpcWallDescriptions.cavity_filled_cavity: cavity_filled_efficiency,
|
||||
EpcWallDescriptions.cavity_internal_insulation: internal_external_insulation_efficiency,
|
||||
EpcWallDescriptions.cavity_external_insulation: internal_external_insulation_efficiency,
|
||||
|
||||
# Solid brick
|
||||
# value mappings
|
||||
EpcWallDescriptions.solid_brick_no_insulation_assumed: EpcEfficiency.POOR,
|
||||
EpcWallDescriptions.solid_brick_partial_insulated_assumed: EpcEfficiency.AVERAGE,
|
||||
EpcWallDescriptions.solid_brick_insulated_assumed: EpcEfficiency.GOOD,
|
||||
# function mappings
|
||||
EpcWallDescriptions.solid_brick_internal_insulation: internal_external_insulation_efficiency,
|
||||
EpcWallDescriptions.solid_brick_external_insulation: internal_external_insulation_efficiency,
|
||||
|
||||
# System
|
||||
# value mappings
|
||||
EpcWallDescriptions.system_no_insulation_assumed: EpcEfficiency.POOR,
|
||||
EpcWallDescriptions.system_partial_insulated_assumed: EpcEfficiency.AVERAGE,
|
||||
EpcWallDescriptions.system_insulated_assumed: EpcEfficiency.GOOD,
|
||||
# function mappings
|
||||
EpcWallDescriptions.system_internal_insulation: internal_external_insulation_efficiency,
|
||||
EpcWallDescriptions.system_external_insulation: internal_external_insulation_efficiency,
|
||||
|
||||
# Timber frame
|
||||
# value mappings
|
||||
EpcWallDescriptions.timber_frame_no_insulation_assumed: EpcEfficiency.POOR,
|
||||
EpcWallDescriptions.timber_frame_partial_insulated_assumed: EpcEfficiency.AVERAGE,
|
||||
EpcWallDescriptions.timber_frame_insulated_assumed: EpcEfficiency.GOOD,
|
||||
# function mappings
|
||||
EpcWallDescriptions.timber_frame_internal_insulation: timber_granite_sandstone_internal_external_efficiency,
|
||||
EpcWallDescriptions.timber_frame_external_insulation: timber_granite_sandstone_internal_external_efficiency,
|
||||
|
||||
# Granite / whinstone
|
||||
EpcWallDescriptions.granite_whinstone_no_insulation_assumed: EpcEfficiency.VERY_POOR,
|
||||
EpcWallDescriptions.granite_whinstone_partial_insulated_assumed: EpcEfficiency.AVERAGE,
|
||||
EpcWallDescriptions.granite_whinestone_insulated_assumed: EpcEfficiency.GOOD,
|
||||
# function mappings
|
||||
EpcWallDescriptions.granite_whinstone_internal_insulation: timber_granite_sandstone_internal_external_efficiency,
|
||||
EpcWallDescriptions.granite_whinstone_external_insulation: timber_granite_sandstone_internal_external_efficiency,
|
||||
|
||||
# Sandstone / limestone
|
||||
EpcWallDescriptions.sandstone_limestone_no_insulation_assumed: EpcEfficiency.VERY_POOR,
|
||||
EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed: EpcEfficiency.AVERAGE,
|
||||
EpcWallDescriptions.sandstone_limestone_insulated_assumed: EpcEfficiency.GOOD,
|
||||
# function mappings
|
||||
EpcWallDescriptions.sandstone_limestone_internal_insulation: timber_granite_sandstone_internal_external_efficiency,
|
||||
EpcWallDescriptions.sandstone_limestone_external_insulation: timber_granite_sandstone_internal_external_efficiency,
|
||||
|
||||
# Cob (special case)
|
||||
EpcWallDescriptions.cob_as_built_average: EpcEfficiency.AVERAGE,
|
||||
EpcWallDescriptions.cob_as_built_good: EpcEfficiency.GOOD,
|
||||
|
||||
# Unknown mappings which are unhandled
|
||||
EpcWallDescriptions.cavity_as_built_unknown: EpcEfficiency.NA,
|
||||
EpcWallDescriptions.solid_brick_as_built_unknown: EpcEfficiency.NA,
|
||||
EpcWallDescriptions.system_as_built_unknown: EpcEfficiency.NA,
|
||||
EpcWallDescriptions.timber_frame_as_built_unknown: EpcEfficiency.NA,
|
||||
EpcWallDescriptions.granite_as_built_unknown: EpcEfficiency.NA,
|
||||
EpcWallDescriptions.sandstone_as_built_unknown: EpcEfficiency.NA,
|
||||
EpcWallDescriptions.cob_as_built_unknown: EpcEfficiency.NA,
|
||||
|
||||
}
|
||||
|
||||
|
||||
def resolve_wall_efficiency(
|
||||
description: EpcWallDescriptions,
|
||||
age_band: EpcConstructionAgeBand,
|
||||
) -> EpcEfficiency:
|
||||
rule = WALL_DESCRIPTION_EFFICIENCIES[description]
|
||||
|
||||
if isinstance(rule, EpcEfficiency):
|
||||
return rule
|
||||
|
||||
return rule(age_band)
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
parity_map = {
|
||||
"Flat": "Flat",
|
||||
"Maisonette": "Maisonette",
|
||||
"Bungalow": "Bungalow",
|
||||
"House": "House",
|
||||
}
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
parity_map = {
|
||||
|
||||
}
|
||||
|
|
@ -1,93 +1,371 @@
|
|||
import re
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
from etl.epc.DataProcessor import construction_age_bounds_map
|
||||
from backend.onboarders.mappings.property_type import parity_map as property_map
|
||||
from backend.onboarders.mappings.age_band import party_map as age_band_map
|
||||
from backend.onboarders.mappings.built_form import parity_map as built_form_map
|
||||
|
||||
|
||||
def check_nulls(data, original_column, mapped_column):
|
||||
# We only allow nulls if the oroginal value was null
|
||||
null_vals = data[pd.isnull(data[mapped_column])]
|
||||
if null_vals.empty:
|
||||
return True
|
||||
# We make sure all original values were null
|
||||
assert pd.isnull(null_vals[original_column]).all(), (
|
||||
f"Some values in {mapped_column} were not mapped, but original values were not null"
|
||||
)
|
||||
|
||||
|
||||
# Sample input data
|
||||
|
||||
data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
||||
"- Data Extracts for Domna.xlsx",
|
||||
sheet_name="Sustainability"
|
||||
from backend.onboarders.base import OnboarderBase
|
||||
# Parity mappings
|
||||
from backend.onboarders.mappings.parity.property_type import parity_map as property_map
|
||||
from backend.onboarders.mappings.parity.age_band import parity_map as age_band_map
|
||||
from backend.onboarders.mappings.parity.built_form import parity_map as built_form_map
|
||||
from backend.onboarders.mappings.parity.walls import wall_map, wall_unknown_age_fallback, WALL_DESCRIPTION_EFFICIENCIES
|
||||
from onboarders.mappings.parity.roof import roof_map, roof_unknown_age_fallback, resolve_roof_efficiency
|
||||
from onboarders.mappings.parity.floor import floor_map
|
||||
from onboarders.mappings.parity.heating import heating_map
|
||||
from onboarders.mappings.parity.glazing import glazing_map
|
||||
from backend.onboarders.mappings.parity.as_built_wall_classifiers import as_built_wall_classifiers
|
||||
from backend.onboarders.mappings.parity.as_built_roof_classifiers import as_built_roof_classifiers
|
||||
from backend.onboarders.mappings.parity.as_built_floor_classifiers import (
|
||||
as_built_floor_classifiers, unknown_as_built_floor_classifiers
|
||||
)
|
||||
from datatypes.epc.roof import EpcRoofDescriptions
|
||||
from datatypes.epc.floor import EpcFloorDescriptions
|
||||
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
|
||||
from datatypes.epc.walls import EpcWallDescriptions
|
||||
from datatypes.epc.efficiency import EpcEfficiency
|
||||
|
||||
# We want to map the parity fields to standard EPC references. This will allow us to
|
||||
# 1) Estimate EPCs, more accurately
|
||||
# 2) Patch incorrect EPCs with ease
|
||||
# 3) Indicate already installed measures
|
||||
|
||||
# ------------ construction_age_band ------------
|
||||
# Map to EPC age bands
|
||||
# def construction_date_to_band(year):
|
||||
# if pd.isnull(year):
|
||||
# return None
|
||||
# # Get the year from the date which is numpy datetime format
|
||||
# for label, ranges in construction_age_bounds_map.items():
|
||||
# if ranges["l"] <= year <= ranges["u"]:
|
||||
# return label
|
||||
# raise NotImplementedError("year out of bounds")
|
||||
#
|
||||
#
|
||||
# data["construction_age_band"] = pd.to_datetime(data["Construction Date"]).dt.year.apply(construction_date_to_band)
|
||||
|
||||
data["construction_age_band"] = data["Construction Years"].map(age_band_map)
|
||||
|
||||
check_nulls(data, "Construction Years", "construction_age_band")
|
||||
|
||||
# ------------ property_type ------------
|
||||
data["property_type"] = data["Type"].map(property_map)
|
||||
|
||||
assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped"
|
||||
|
||||
# ------------ built_form ------------
|
||||
data["built_form"] = data["Attachment"].map(built_form_map)
|
||||
|
||||
assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped"
|
||||
|
||||
# ------------ Wall Construction ------------
|
||||
|
||||
data["walls_combined"] = data["Wall Construction"] + "+" + data["Wall Insulation"].fillna("Unknown Insulation")
|
||||
|
||||
data["Wall Insulation"].value_counts()
|
||||
data["Wall Construction"].value_counts()
|
||||
|
||||
as_built_map = {
|
||||
"Cavity": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
|
||||
"Solid Brick": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
|
||||
"System": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
|
||||
"Timber Frame": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
|
||||
"Sandstone": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
|
||||
"Granite": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
|
||||
"Cob": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
|
||||
}
|
||||
tqdm.pandas()
|
||||
|
||||
|
||||
def map_wall_construction(wall_constuction, wall_insulation, construction_age_band):
|
||||
if wall_insulation == "AsBuilt":
|
||||
# Deduce based on wall construction and age band
|
||||
bands = as_built_map.get(wall_constuction, None)
|
||||
if bands is None:
|
||||
raise NotImplementedError(f"Wall construction {wall_constuction} not in as built map")
|
||||
class ParityOnboarder(OnboarderBase):
|
||||
|
||||
# We check if the age band is in insulated or partial insulated, and if neither, we assume uninsulated
|
||||
def __init__(
|
||||
self,
|
||||
fileuri: str,
|
||||
file_format: str,
|
||||
**kwargs
|
||||
):
|
||||
# Extract bucket, and filekey; Will be in the format s3://bucket/key
|
||||
self.bucket_name = fileuri.split("/")[2]
|
||||
self.input_file_name = "/".join(fileuri.split("/")[3:])
|
||||
# Also prepare output file name
|
||||
self.output_file_name = self.input_file_name.replace("." + file_format, "") + "_transformed.csv"
|
||||
|
||||
# Variables we want to map
|
||||
# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type',
|
||||
# 'Attachment', 'Construction Years', 'Wall Construction',
|
||||
# 'Wall Insulation', 'Roof Construction', 'Roof Insulation',
|
||||
# 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating',
|
||||
# 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN',
|
||||
# 'Total Floor Area (m2)'
|
||||
self.read_s3(file_format=file_format, **kwargs)
|
||||
pass
|
||||
|
||||
def map_construction_age_band(self):
|
||||
self.data[self.landlord_construction_age_band] = self.data["Construction Years"].map(age_band_map)
|
||||
self.assert_nulls_only_from_source_nulls(
|
||||
self.data, "Construction Years", self.landlord_construction_age_band
|
||||
)
|
||||
|
||||
def map_property_type(self):
|
||||
self.data[self.landlord_property_type] = self.data["Type"].map(property_map)
|
||||
self.assert_no_nulls(self.data, self.landlord_property_type)
|
||||
|
||||
def map_built_form(self):
|
||||
self.data[self.landlord_built_form] = self.data["Attachment"].map(built_form_map)
|
||||
self.assert_no_nulls(self.data, self.landlord_built_form)
|
||||
|
||||
@staticmethod
|
||||
def _fill_wall_as_built(row: pd.Series) -> EpcWallDescriptions | None:
|
||||
"""
|
||||
Utility function, used by map_wall_construction in parity transformation module
|
||||
:param row: row of input sustainability data, being transformed
|
||||
:return: EpcWallDescriptions, the as built wall description for the input row, based on the wall construction
|
||||
type and age band
|
||||
"""
|
||||
# Already resolved via direct mapping
|
||||
if row.landlord_wall_construction is not None:
|
||||
return row.landlord_wall_construction
|
||||
|
||||
wall_type = row["Wall Construction"]
|
||||
|
||||
# Missing construction age → conservative fallback
|
||||
if pd.isnull(row.landlord_construction_age_band):
|
||||
return wall_unknown_age_fallback.get(wall_type)
|
||||
|
||||
classifier = as_built_wall_classifiers.get(wall_type)
|
||||
if classifier is None:
|
||||
return None
|
||||
|
||||
return classifier(row.landlord_construction_age_band)
|
||||
|
||||
@staticmethod
|
||||
def _resolve_wall_efficiency(
|
||||
description: EpcWallDescriptions,
|
||||
age_band: EpcConstructionAgeBand | None,
|
||||
) -> EpcEfficiency:
|
||||
# Unknown / holding descriptions → efficiency unknown
|
||||
if "unknown insulation" in description.value.lower():
|
||||
return EpcEfficiency.NA
|
||||
|
||||
rule = WALL_DESCRIPTION_EFFICIENCIES.get(description)
|
||||
|
||||
if rule is None:
|
||||
return EpcEfficiency.NA
|
||||
|
||||
if isinstance(rule, EpcEfficiency):
|
||||
return rule
|
||||
|
||||
# Rule needs age band but we don't have one
|
||||
if age_band is None or pd.isnull(age_band):
|
||||
return EpcEfficiency.NA
|
||||
|
||||
return rule(age_band)
|
||||
|
||||
def map_wall_construction(self):
|
||||
self.data[self.landlord_wall_construction] = (
|
||||
self.data[["Wall Construction", "Wall Insulation"]]
|
||||
.apply(tuple, axis=1)
|
||||
.map(wall_map)
|
||||
)
|
||||
|
||||
self.data[self.landlord_wall_construction] = self.data.progress_apply(self._fill_wall_as_built, axis=1)
|
||||
|
||||
# Sanity check
|
||||
self.assert_no_nulls(self.data, self.landlord_wall_construction)
|
||||
|
||||
self.data[self.landlord_wall_efficiency] = self.data.progress_apply(
|
||||
lambda row: self._resolve_wall_efficiency(
|
||||
row.landlord_wall_construction,
|
||||
row.landlord_construction_age_band,
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
# Additional santify check
|
||||
self.assert_no_nulls(self.data, self.landlord_wall_efficiency)
|
||||
|
||||
@staticmethod
|
||||
def _fill_roof_as_built(row: pd.Series) -> EpcRoofDescriptions | None:
|
||||
# Already resolved
|
||||
if not pd.isnull(row.landlord_roof_construction):
|
||||
return row.landlord_roof_construction
|
||||
|
||||
roof_type = row["Roof Construction"]
|
||||
|
||||
classifier = as_built_roof_classifiers.get(roof_type)
|
||||
if classifier is None:
|
||||
raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'")
|
||||
|
||||
if pd.isnull(row.landlord_construction_age_band):
|
||||
return roof_unknown_age_fallback.get(roof_type)
|
||||
|
||||
output = classifier(row.landlord_construction_age_band)
|
||||
if output is None:
|
||||
raise NotImplementedError(
|
||||
f"Roof classification returned None for roof type '{roof_type}'"
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@staticmethod
|
||||
def _extract_insulation_thickness(value: str | None) -> int | None:
|
||||
"""
|
||||
Extract insulation thickness in mm from a string like 'mm150'.
|
||||
Returns None if not present or not parseable.
|
||||
"""
|
||||
if value is None or pd.isnull(value):
|
||||
return None
|
||||
|
||||
match = re.search(r"(\d+)", str(value))
|
||||
if not match:
|
||||
return None
|
||||
|
||||
return int(match.group(1))
|
||||
|
||||
def map_roof_construction(self):
|
||||
self.data[self.landlord_roof_construction] = (
|
||||
self.data[["Roof Construction", "Roof Insulation"]]
|
||||
.progress_apply(tuple, axis=1)
|
||||
.map(roof_map)
|
||||
)
|
||||
|
||||
self.data[self.landlord_roof_construction] = self.data.progress_apply(
|
||||
self._fill_roof_as_built,
|
||||
axis=1,
|
||||
)
|
||||
|
||||
# sanity check
|
||||
self.assert_no_nulls(self.data, self.landlord_roof_construction)
|
||||
|
||||
self.data["roof_insulation_thickness_mm"] = self.data["Roof Insulation"].apply(
|
||||
self._extract_insulation_thickness
|
||||
)
|
||||
|
||||
self.data[self.landlord_roof_efficiency] = self.data.progress_apply(
|
||||
lambda row: resolve_roof_efficiency(
|
||||
description=row.landlord_roof_construction,
|
||||
age_band=row.landlord_construction_age_band,
|
||||
insulation_thickness=row.roof_insulation_thickness_mm,
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
# sanity check
|
||||
self.assert_no_nulls(self.data, self.landlord_roof_efficiency)
|
||||
|
||||
# Flag sloping ceiling
|
||||
self.data[self.landlord_has_sloping_ceiling] = self.data["Roof Construction"].apply(
|
||||
lambda x: x == "PitchedWithSlopingCeiling"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _fill_floor_as_built(row: pd.Series):
|
||||
# 1. Already resolved
|
||||
if row.landlord_floor_construction is not None:
|
||||
return row.landlord_floor_construction
|
||||
|
||||
age_band = row.landlord_construction_age_band
|
||||
floor_type = row["Floor Construction"]
|
||||
insulation = row["Floor Insulation"]
|
||||
|
||||
# 2. Missing age band → conservative fallback
|
||||
if pd.isnull(age_band):
|
||||
return EpcFloorDescriptions.unknown
|
||||
|
||||
# 3. Known floor types
|
||||
if floor_type in ["Solid", "SuspendedTimber", "SuspendedNotTimber"]:
|
||||
classifier = as_built_floor_classifiers[floor_type]
|
||||
return classifier(age_band)
|
||||
|
||||
# 4. Unknown floor type
|
||||
if floor_type == "Unknown":
|
||||
classifier = unknown_as_built_floor_classifiers[insulation]
|
||||
return classifier(age_band)
|
||||
|
||||
# 5. Truly missing / garbage input
|
||||
return EpcFloorDescriptions.unknown
|
||||
|
||||
def map_floor_construction(self):
|
||||
self.data[self.landlord_floor_construction] = (
|
||||
self.data[["Floor Construction", "Floor Insulation"]]
|
||||
.progress_apply(tuple, axis=1)
|
||||
.map(floor_map)
|
||||
)
|
||||
|
||||
self.data[self.landlord_floor_construction] = self.data.progress_apply(
|
||||
self._fill_floor_as_built,
|
||||
axis=1,
|
||||
)
|
||||
|
||||
self.assert_no_nulls(self.data, self.landlord_floor_construction)
|
||||
|
||||
def map_glazing(self):
|
||||
# TODO: probably doesn't make sense to store multi glazed proportion, glazed type or glazed area.
|
||||
# There is maybe an argument for landlord_multi_glaze_proportion as this could be variable,
|
||||
# however
|
||||
self.data[
|
||||
[
|
||||
self.landlord_windows_type,
|
||||
self.landlord_windows_efficiency,
|
||||
self.landlord_multi_glaze_proportion,
|
||||
self.landlord_glazed_type,
|
||||
self.landlord_glazed_area
|
||||
]
|
||||
] = self.data["Glazing"].map(glazing_map).progress_apply(pd.Series)
|
||||
|
||||
def map_heating(self):
|
||||
# TODO - when mapping heating controls, we should check the existing heating controls and the efficiency rating
|
||||
# For sub optimal heating controls, we're going to make an assumption as to what the heating controls are
|
||||
# and the energy efficiency rating we prescribe here may not be accurate. We therefore use this as an
|
||||
# upper limit
|
||||
# as opposed to a guaranteed efficiency rating. To stress, this is only relevant for sub optimal heating
|
||||
# controls. E.g. it may be programmer and room thermostat
|
||||
self.data[
|
||||
[
|
||||
self.landlord_heating_construction,
|
||||
self.landlord_heating_efficiency,
|
||||
self.landlord_fuel_type,
|
||||
self.landlord_heating_controls,
|
||||
self.landlord_heating_controls_efficiency,
|
||||
self.landlord_hot_water_system,
|
||||
self.landlord_hot_water_efficiency
|
||||
]
|
||||
] = self.data[
|
||||
[
|
||||
"Heating",
|
||||
"Boiler Efficiency",
|
||||
"Main Fuel",
|
||||
"Controls Adequacy"
|
||||
]
|
||||
].progress_apply(tuple, axis=1).map(heating_map).progress_apply(pd.Series)
|
||||
|
||||
def map_floor_area(self):
|
||||
# This is just a rename
|
||||
self.data = self.data.rename(
|
||||
columns={"Total Floor Area (m2)": self.landlord_total_floor_area_m2}
|
||||
)
|
||||
|
||||
def select_columns(self):
|
||||
self.data = self.data[
|
||||
[
|
||||
"Org Ref",
|
||||
"UPRN",
|
||||
"Address 1",
|
||||
"Address 2",
|
||||
"Address 3",
|
||||
"Postcode",
|
||||
self.landlord_total_floor_area_m2,
|
||||
self.landlord_construction_age_band,
|
||||
self.landlord_property_type,
|
||||
self.landlord_built_form,
|
||||
self.landlord_wall_construction,
|
||||
self.landlord_wall_efficiency,
|
||||
self.landlord_roof_construction,
|
||||
self.landlord_roof_efficiency,
|
||||
self.landlord_has_sloping_ceiling,
|
||||
self.landlord_floor_construction,
|
||||
self.landlord_windows_type,
|
||||
self.landlord_windows_efficiency,
|
||||
self.landlord_multi_glaze_proportion,
|
||||
self.landlord_glazed_type,
|
||||
self.landlord_glazed_area,
|
||||
self.landlord_heating_construction,
|
||||
self.landlord_heating_efficiency,
|
||||
self.landlord_fuel_type,
|
||||
self.landlord_heating_controls,
|
||||
self.landlord_heating_controls_efficiency,
|
||||
self.landlord_hot_water_system,
|
||||
self.landlord_hot_water_efficiency
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
"Org Ref": "landlord_property_id",
|
||||
"Address1": "address1",
|
||||
"Address2": "address2",
|
||||
"Address3": "address3",
|
||||
"Postcode": "postcode",
|
||||
}
|
||||
)
|
||||
|
||||
def extract_values(self):
|
||||
for columns in [
|
||||
self.landlord_construction_age_band, self.landlord_property_type, self.landlord_built_form,
|
||||
self.landlord_wall_construction, self.landlord_wall_efficiency, self.landlord_roof_construction,
|
||||
self.landlord_roof_efficiency, self.landlord_floor_construction, self.landlord_windows_type,
|
||||
self.landlord_windows_efficiency, self.landlord_heating_construction, self.landlord_heating_efficiency,
|
||||
self.landlord_fuel_type, self.landlord_heating_controls, self.landlord_heating_controls_efficiency,
|
||||
self.landlord_hot_water_system, self.landlord_hot_water_efficiency
|
||||
]:
|
||||
self.data[columns] = self.data[columns].progress_apply(lambda x: x.value if hasattr(x, "value") else x)
|
||||
|
||||
def transform(self):
|
||||
# ------------ construction_age_band ------------
|
||||
self.map_construction_age_band()
|
||||
|
||||
# ------------ property_type ------------
|
||||
self.map_property_type()
|
||||
|
||||
# ------------ built_form ------------
|
||||
self.map_built_form()
|
||||
|
||||
# ------------ Wall Construction ------------
|
||||
self.map_wall_construction()
|
||||
|
||||
# ------------ Roof Construction ------------
|
||||
self.map_roof_construction()
|
||||
|
||||
# ------------ Floor Construction ------------
|
||||
self.map_floor_construction()
|
||||
|
||||
# ------------ Glazing ------------
|
||||
self.map_glazing()
|
||||
|
||||
# ------------ Heating, fuel, controls & hot water ------------
|
||||
self.map_heating()
|
||||
|
||||
# ------------ Floor Area ------------
|
||||
self.map_floor_area()
|
||||
|
||||
# ------------ Formating ------------
|
||||
self.select_columns()
|
||||
self.extract_values()
|
||||
|
|
|
|||
6
backend/onboarders/requirements.txt
Normal file
6
backend/onboarders/requirements.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
boto3
|
||||
numpy==2.1.2
|
||||
pandas==2.2.3
|
||||
tqdm==4.66.5
|
||||
pydantic==2.9.2
|
||||
openpyxl==3.1.2
|
||||
97
backend/onboarders/tests/test_floor_remapping.py
Normal file
97
backend/onboarders/tests/test_floor_remapping.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
import pytest
|
||||
|
||||
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
|
||||
from datatypes.epc.floor import EpcFloorDescriptions
|
||||
|
||||
from backend.onboarders.mappings.parity.as_built_floor_classifiers import (
|
||||
unknown_floor_as_built,
|
||||
unknown_floor_retrofitted,
|
||||
map_solid_floor_as_built,
|
||||
map_suspended_floor_as_built,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"age_band,expected",
|
||||
[
|
||||
# Before 1900 / 1900–1929 → suspended, no insulation
|
||||
(EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_no_insulation_assumed),
|
||||
|
||||
# 1930–1995 → solid, no insulation
|
||||
(EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1967_to_1975, EpcFloorDescriptions.solid_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed),
|
||||
|
||||
# 1996–2002 → solid, limited insulation
|
||||
(EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed),
|
||||
|
||||
# 2003+ → solid, insulated
|
||||
(EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed),
|
||||
(EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed),
|
||||
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed),
|
||||
],
|
||||
)
|
||||
def test_unknown_floor_as_built(age_band, expected):
|
||||
assert unknown_floor_as_built(age_band) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"age_band,expected",
|
||||
[
|
||||
# Pre-1930 → suspended, insulated
|
||||
(EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_insulated),
|
||||
(EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_insulated),
|
||||
|
||||
# 1930+ → solid, insulated
|
||||
(EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_insulated),
|
||||
(EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_insulated),
|
||||
(EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_insulated),
|
||||
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated),
|
||||
],
|
||||
)
|
||||
def test_unknown_floor_retrofitted(age_band, expected):
|
||||
assert unknown_floor_retrofitted(age_band) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"age_band,expected",
|
||||
[
|
||||
# 1983–1995 → no insulation
|
||||
(EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed),
|
||||
|
||||
# 1996–2002 → limited insulation
|
||||
(EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed),
|
||||
|
||||
# 2003+ → insulated
|
||||
(EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed),
|
||||
(EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed),
|
||||
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed),
|
||||
],
|
||||
)
|
||||
def test_solid_floor_as_built(age_band, expected):
|
||||
assert map_solid_floor_as_built(age_band) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"age_band,expected",
|
||||
[
|
||||
# 1983–1995 → no insulation
|
||||
(EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.suspended_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.suspended_no_insulation_assumed),
|
||||
|
||||
# 1996–2002 → limited insulation
|
||||
(EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.suspended_limited_insulation_assumed),
|
||||
|
||||
# 2003+ → insulated
|
||||
(EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.suspended_insulated_assumed),
|
||||
(EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.suspended_insulated_assumed),
|
||||
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.suspended_insulated_assumed),
|
||||
],
|
||||
)
|
||||
def test_suspended_floor_as_built(age_band, expected):
|
||||
assert map_suspended_floor_as_built(age_band) == expected
|
||||
173
backend/onboarders/tests/test_roof_remapping.py
Normal file
173
backend/onboarders/tests/test_roof_remapping.py
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
import pytest
|
||||
|
||||
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
|
||||
from datatypes.epc.roof import EpcRoofDescriptions
|
||||
from datatypes.epc.efficiency import EpcEfficiency
|
||||
|
||||
from backend.onboarders.mappings.parity.as_built_roof_classifiers import (
|
||||
map_flat_roof,
|
||||
map_sloping_ceiling_roof,
|
||||
)
|
||||
from backend.onboarders.mappings.parity.roof import resolve_roof_efficiency
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# As-built roof description classification
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"age_band, expected",
|
||||
[
|
||||
(EpcConstructionAgeBand.before_1900, EpcRoofDescriptions.flat_no_insulation),
|
||||
(EpcConstructionAgeBand.from_1950_to_1966, EpcRoofDescriptions.flat_no_insulation),
|
||||
(EpcConstructionAgeBand.from_1967_to_1975, EpcRoofDescriptions.flat_limited_insulation),
|
||||
(EpcConstructionAgeBand.from_1976_to_1982, EpcRoofDescriptions.flat_limited_insulation),
|
||||
(EpcConstructionAgeBand.from_1983_to_1990, EpcRoofDescriptions.flat_insulated),
|
||||
(EpcConstructionAgeBand.from_2007_to_2011, EpcRoofDescriptions.flat_insulated),
|
||||
(EpcConstructionAgeBand.from_2023_onwards, EpcRoofDescriptions.flat_insulated),
|
||||
],
|
||||
)
|
||||
def test_classify_flat_roof(age_band, expected):
|
||||
assert map_flat_roof(age_band) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"age_band, expected",
|
||||
[
|
||||
(EpcConstructionAgeBand.before_1900, EpcRoofDescriptions.sloping_pitched_no_insulation),
|
||||
(EpcConstructionAgeBand.from_1967_to_1975, EpcRoofDescriptions.sloping_pitched_no_insulation),
|
||||
(EpcConstructionAgeBand.from_1976_to_1982, EpcRoofDescriptions.sloping_pitched_limited_insulation),
|
||||
(EpcConstructionAgeBand.from_1983_to_1990, EpcRoofDescriptions.sloping_pitched_insulated),
|
||||
(EpcConstructionAgeBand.from_2012_to_2022, EpcRoofDescriptions.sloping_pitched_insulated),
|
||||
(EpcConstructionAgeBand.from_2023_onwards, EpcRoofDescriptions.sloping_pitched_insulated),
|
||||
],
|
||||
)
|
||||
def test_classify_sloping_ceiling_roof(age_band, expected):
|
||||
assert map_sloping_ceiling_roof(age_band) == expected
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Roof efficiency — fixed & age-band driven
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"description, age_band, expected",
|
||||
[
|
||||
# Flat roof, no insulation
|
||||
(EpcRoofDescriptions.flat_no_insulation, EpcConstructionAgeBand.before_1900, EpcEfficiency.VERY_POOR),
|
||||
|
||||
# Flat roof, limited insulation (age-band driven)
|
||||
(EpcRoofDescriptions.flat_limited_insulation, EpcConstructionAgeBand.from_1976_to_1982, EpcEfficiency.POOR),
|
||||
(
|
||||
EpcRoofDescriptions.flat_limited_insulation, EpcConstructionAgeBand.from_1967_to_1975,
|
||||
EpcEfficiency.VERY_POOR),
|
||||
|
||||
# Flat roof, insulated (age-band driven)
|
||||
(EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_1983_to_1990, EpcEfficiency.AVERAGE),
|
||||
(EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_2003_to_2006, EpcEfficiency.GOOD),
|
||||
(EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_2023_onwards, EpcEfficiency.VERY_GOOD),
|
||||
|
||||
# Pitched, insulated assumed (loft)
|
||||
(EpcRoofDescriptions.pitched_insulated_assumed, EpcConstructionAgeBand.from_1996_to_2002, EpcEfficiency.GOOD),
|
||||
(EpcRoofDescriptions.pitched_insulated_assumed, EpcConstructionAgeBand.from_2007_to_2011,
|
||||
EpcEfficiency.VERY_GOOD),
|
||||
],
|
||||
)
|
||||
def test_roof_efficiency_age_band_only(description, age_band, expected):
|
||||
assert resolve_roof_efficiency(
|
||||
description=description,
|
||||
age_band=age_band,
|
||||
insulation_thickness=None,
|
||||
) == expected
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Roof efficiency — insulation thickness driven
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"description, thickness, expected",
|
||||
[
|
||||
# Loft insulation
|
||||
(EpcRoofDescriptions.loft_12mm_insulation, 12, EpcEfficiency.VERY_POOR),
|
||||
(EpcRoofDescriptions.loft_25mm_insulation, 25, EpcEfficiency.POOR),
|
||||
(EpcRoofDescriptions.loft_75mm_insulation, 75, EpcEfficiency.AVERAGE),
|
||||
(EpcRoofDescriptions.loft_150mm_insulation, 150, EpcEfficiency.GOOD),
|
||||
(EpcRoofDescriptions.loft_300mm_insulation, 300, EpcEfficiency.VERY_GOOD),
|
||||
|
||||
# Flat insulated — thickness overrides age band
|
||||
(EpcRoofDescriptions.flat_insulated, 50, EpcEfficiency.POOR),
|
||||
(EpcRoofDescriptions.flat_insulated, 100, EpcEfficiency.AVERAGE),
|
||||
(EpcRoofDescriptions.flat_insulated, 200, EpcEfficiency.GOOD),
|
||||
(EpcRoofDescriptions.flat_insulated, 300, EpcEfficiency.VERY_GOOD),
|
||||
|
||||
# Sloping ceiling
|
||||
(EpcRoofDescriptions.sloping_pitched_insulated, 75, EpcEfficiency.AVERAGE),
|
||||
(EpcRoofDescriptions.sloping_pitched_insulated, 150, EpcEfficiency.GOOD),
|
||||
(EpcRoofDescriptions.sloping_pitched_insulated, 350, EpcEfficiency.VERY_GOOD),
|
||||
],
|
||||
)
|
||||
def test_roof_efficiency_thickness_based(description, thickness, expected):
|
||||
assert resolve_roof_efficiency(
|
||||
description=description,
|
||||
age_band=EpcConstructionAgeBand.before_1900, # should be ignored
|
||||
insulation_thickness=thickness,
|
||||
) == expected
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Thatched roofs
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"description, age_band, expected",
|
||||
[
|
||||
(EpcRoofDescriptions.thatched, EpcConstructionAgeBand.before_1900, EpcEfficiency.AVERAGE),
|
||||
(EpcRoofDescriptions.thatched, EpcConstructionAgeBand.from_2003_to_2006, EpcEfficiency.GOOD),
|
||||
(EpcRoofDescriptions.thatched, EpcConstructionAgeBand.from_2023_onwards, EpcEfficiency.VERY_GOOD),
|
||||
],
|
||||
)
|
||||
def test_thatched_efficiency_age_band(description, age_band, expected):
|
||||
assert resolve_roof_efficiency(
|
||||
description=description,
|
||||
age_band=age_band,
|
||||
insulation_thickness=None,
|
||||
) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"thickness, expected",
|
||||
[
|
||||
(12, EpcEfficiency.AVERAGE),
|
||||
(50, EpcEfficiency.GOOD),
|
||||
(150, EpcEfficiency.GOOD),
|
||||
(200, EpcEfficiency.VERY_GOOD),
|
||||
],
|
||||
)
|
||||
def test_thatched_efficiency_thickness(thickness, expected):
|
||||
assert resolve_roof_efficiency(
|
||||
description=EpcRoofDescriptions.thatched_with_additional_insulation,
|
||||
age_band=EpcConstructionAgeBand.before_1900,
|
||||
insulation_thickness=thickness,
|
||||
) == expected
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Unknown / holding descriptions
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"description",
|
||||
[
|
||||
EpcRoofDescriptions.flat_as_built_unknown,
|
||||
EpcRoofDescriptions.loft_as_built_unknown,
|
||||
EpcRoofDescriptions.thatched_as_built_unknown,
|
||||
EpcRoofDescriptions.sloping_pitched_as_built_unknown,
|
||||
],
|
||||
)
|
||||
def test_unknown_roof_descriptions_return_na(description):
|
||||
assert resolve_roof_efficiency(
|
||||
description=description,
|
||||
age_band=None,
|
||||
insulation_thickness=None,
|
||||
) == EpcEfficiency.NA
|
||||
161
backend/onboarders/tests/test_wall_remapping.py
Normal file
161
backend/onboarders/tests/test_wall_remapping.py
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
import pytest
|
||||
|
||||
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
|
||||
from datatypes.epc.walls import EpcWallDescriptions
|
||||
from datatypes.epc.efficiency import EpcEfficiency
|
||||
|
||||
from backend.onboarders.mappings.parity.walls import resolve_wall_efficiency
|
||||
from backend.onboarders.mappings.parity.as_built_wall_classifiers import (
|
||||
map_cavity_wall_insulation,
|
||||
map_solid_wall_insulation,
|
||||
map_timber_frame_wall_insulation,
|
||||
map_system_build_wall_insulation,
|
||||
map_granite_wall_insulation,
|
||||
map_sandstone_wall_insulation,
|
||||
map_cob_wall_insulation,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# As-built wall description classification
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"age_band, expected",
|
||||
[
|
||||
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.cavity_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1950_to_1966, EpcWallDescriptions.cavity_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.cavity_partial_insulated_assumed),
|
||||
(EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.cavity_insulated_assumed),
|
||||
(EpcConstructionAgeBand.from_2023_onwards, EpcWallDescriptions.cavity_insulated_assumed),
|
||||
],
|
||||
)
|
||||
def test_map_cavity_wall_insulation(age_band, expected):
|
||||
assert map_cavity_wall_insulation(age_band) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"age_band, expected",
|
||||
[
|
||||
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.solid_brick_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.solid_brick_partial_insulated_assumed),
|
||||
(EpcConstructionAgeBand.from_1996_to_2002, EpcWallDescriptions.solid_brick_insulated_assumed),
|
||||
],
|
||||
)
|
||||
def test_map_solid_wall_insulation(age_band, expected):
|
||||
assert map_solid_wall_insulation(age_band) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"age_band, expected",
|
||||
[
|
||||
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.timber_frame_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1950_to_1966, EpcWallDescriptions.timber_frame_partial_insulated_assumed),
|
||||
(EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.timber_frame_insulated_assumed),
|
||||
],
|
||||
)
|
||||
def test_map_timber_frame_wall_insulation(age_band, expected):
|
||||
assert map_timber_frame_wall_insulation(age_band) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"age_band, expected",
|
||||
[
|
||||
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.system_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.system_partial_insulated_assumed),
|
||||
(EpcConstructionAgeBand.from_2003_to_2006, EpcWallDescriptions.system_insulated_assumed),
|
||||
],
|
||||
)
|
||||
def test_map_system_wall_insulation(age_band, expected):
|
||||
assert map_system_build_wall_insulation(age_band) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"age_band, expected",
|
||||
[
|
||||
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.granite_whinstone_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.granite_whinstone_partial_insulated_assumed),
|
||||
(EpcConstructionAgeBand.from_2012_to_2022, EpcWallDescriptions.granite_whinestone_insulated_assumed),
|
||||
],
|
||||
)
|
||||
def test_map_granite_wall_insulation(age_band, expected):
|
||||
assert map_granite_wall_insulation(age_band) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"age_band, expected",
|
||||
[
|
||||
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.sandstone_limestone_no_insulation_assumed),
|
||||
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed),
|
||||
(EpcConstructionAgeBand.from_2007_to_2011, EpcWallDescriptions.sandstone_limestone_insulated_assumed),
|
||||
],
|
||||
)
|
||||
def test_map_sandstone_wall_insulation(age_band, expected):
|
||||
assert map_sandstone_wall_insulation(age_band) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"age_band, expected",
|
||||
[
|
||||
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.cob_as_built_average),
|
||||
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.cob_as_built_average),
|
||||
(EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.cob_as_built_good),
|
||||
],
|
||||
)
|
||||
def test_map_cob_wall_insulation(age_band, expected):
|
||||
assert map_cob_wall_insulation(age_band) == expected
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# Wall efficiency resolution
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"description, age_band, expected",
|
||||
[
|
||||
# Fixed efficiencies
|
||||
(EpcWallDescriptions.cavity_no_insulation_assumed, None, EpcEfficiency.POOR),
|
||||
(EpcWallDescriptions.cavity_partial_insulated_assumed, None, EpcEfficiency.AVERAGE),
|
||||
(EpcWallDescriptions.cavity_insulated_assumed, None, EpcEfficiency.GOOD),
|
||||
|
||||
# Function-based efficiencies
|
||||
(
|
||||
EpcWallDescriptions.cavity_filled_cavity,
|
||||
EpcConstructionAgeBand.from_2023_onwards,
|
||||
EpcEfficiency.VERY_GOOD,
|
||||
),
|
||||
(
|
||||
EpcWallDescriptions.cavity_filled_cavity,
|
||||
EpcConstructionAgeBand.from_1991_to_1995,
|
||||
EpcEfficiency.GOOD,
|
||||
),
|
||||
(
|
||||
EpcWallDescriptions.solid_brick_internal_insulation,
|
||||
EpcConstructionAgeBand.from_2003_to_2006,
|
||||
EpcEfficiency.VERY_GOOD,
|
||||
),
|
||||
(
|
||||
EpcWallDescriptions.solid_brick_internal_insulation,
|
||||
EpcConstructionAgeBand.from_1950_to_1966,
|
||||
EpcEfficiency.GOOD,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_resolve_wall_efficiency(description, age_band, expected):
|
||||
assert resolve_wall_efficiency(description, age_band) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"description",
|
||||
[
|
||||
EpcWallDescriptions.cavity_as_built_unknown,
|
||||
EpcWallDescriptions.solid_brick_as_built_unknown,
|
||||
EpcWallDescriptions.system_as_built_unknown,
|
||||
EpcWallDescriptions.timber_frame_as_built_unknown,
|
||||
EpcWallDescriptions.granite_as_built_unknown,
|
||||
EpcWallDescriptions.sandstone_as_built_unknown,
|
||||
EpcWallDescriptions.cob_as_built_unknown,
|
||||
],
|
||||
)
|
||||
def test_unknown_wall_descriptions_return_na(description):
|
||||
assert resolve_wall_efficiency(description, None) == EpcEfficiency.NA
|
||||
9
backend/postcode_splitter/handler/Dockerfile
Normal file
9
backend/postcode_splitter/handler/Dockerfile
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
FROM public.ecr.aws/lambda/python:3.10
|
||||
|
||||
# Set working directory (Lambda task root)
|
||||
WORKDIR /var/task
|
||||
|
||||
# -----------------------------
|
||||
# Lambda handler
|
||||
# -----------------------------
|
||||
CMD ["main.handler"]
|
||||
0
backend/postcode_splitter/handler/requirements.txt
Normal file
0
backend/postcode_splitter/handler/requirements.txt
Normal file
127
backend/postcode_splitter/main.py
Normal file
127
backend/postcode_splitter/main.py
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
import pandas as pd
|
||||
import requests
|
||||
from backend.address2UPRN.main import (
|
||||
resolve_uprns_for_postcode_group,
|
||||
get_epc_data_with_postcode,
|
||||
)
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def sanitise_postcode(postcode: str) -> str | None:
|
||||
"""
|
||||
Normalise postcode for grouping.
|
||||
|
||||
- Uppercase
|
||||
- Remove all whitespace
|
||||
"""
|
||||
if pd.isna(postcode):
|
||||
return None
|
||||
|
||||
return postcode.upper().replace(" ", "")
|
||||
|
||||
|
||||
def is_valid_postcode(postcode_clean: str) -> bool:
|
||||
"""
|
||||
Validate postcode using postcodes.io.
|
||||
|
||||
Expects a sanitised postcode (e.g. E84SQ).
|
||||
Returns True if valid, False otherwise.
|
||||
"""
|
||||
POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
|
||||
if not postcode_clean:
|
||||
return False
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
|
||||
timeout=5,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("result", False)
|
||||
except requests.RequestException:
|
||||
# Network issues, rate limits, etc.
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
df = pd.read_excel("hackney.xlsx", sheet_name="Sustainability")
|
||||
df = df.head(500)
|
||||
|
||||
# Sanitise postcodes
|
||||
df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode)
|
||||
|
||||
# --- validate AFTER grouping (save API calls) ---
|
||||
|
||||
# Get unique, non-null postcodes
|
||||
unique_postcodes = df["postcode_clean"].dropna().unique()
|
||||
|
||||
# Validate each postcode once, TODOadd a progress bar
|
||||
postcode_validity = {
|
||||
pc: is_valid_postcode(pc)
|
||||
for pc in tqdm(unique_postcodes, total=len(unique_postcodes))
|
||||
}
|
||||
|
||||
# Map validity back onto dataframe
|
||||
df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
|
||||
|
||||
results = []
|
||||
|
||||
for postcode, group_df in tqdm(
|
||||
df[df["postcode_valid"]].groupby("postcode_clean"),
|
||||
desc="Resolving UPRNs by postcode",
|
||||
):
|
||||
try:
|
||||
epc_df = get_epc_data_with_postcode(postcode)
|
||||
|
||||
if epc_df.empty:
|
||||
tmp = group_df.copy()
|
||||
tmp["found_uprn"] = None
|
||||
tmp["status"] = "no_epc_results"
|
||||
results.append(tmp)
|
||||
continue
|
||||
|
||||
resolved = resolve_uprns_for_postcode_group(
|
||||
group_df=group_df,
|
||||
epc_df=epc_df,
|
||||
)
|
||||
|
||||
results.append(resolved)
|
||||
|
||||
except Exception as e:
|
||||
tmp = group_df.copy()
|
||||
tmp["found_uprn"] = None
|
||||
tmp["status"] = "exception"
|
||||
tmp["error"] = str(e)
|
||||
results.append(tmp)
|
||||
|
||||
final_df = pd.concat(results, ignore_index=True)
|
||||
a = final_df[
|
||||
[
|
||||
"best_match_lexiscore",
|
||||
"Address 1",
|
||||
"best_match_address",
|
||||
"Postcode",
|
||||
"UPRN",
|
||||
"best_match_uprn",
|
||||
]
|
||||
] # add levi score to viewing
|
||||
b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing
|
||||
b = b[
|
||||
[
|
||||
"best_match_lexiscore",
|
||||
"Address 1",
|
||||
"best_match_address",
|
||||
"Postcode",
|
||||
"UPRN",
|
||||
"best_match_uprn",
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
def handler(event, context):
|
||||
print("hello Postcode splitter world")
|
||||
return {"statusCode": 200, "body": "hello world"}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
11
conftest.py
11
conftest.py
|
|
@ -1,5 +1,11 @@
|
|||
import os
|
||||
from backend.app.config import get_settings
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
# Load .env in conftest.py directory for local development
|
||||
load_dotenv()
|
||||
|
||||
DEFAULT_ENV = {
|
||||
"API_KEY": "test",
|
||||
|
|
@ -8,7 +14,10 @@ DEFAULT_ENV = {
|
|||
"DATA_BUCKET": "test",
|
||||
"PLAN_TRIGGER_BUCKET": "test",
|
||||
"ENGINE_SQS_URL": "test",
|
||||
"EPC_AUTH_TOKEN": "test", # overridden in GitHub Actions
|
||||
"EPC_AUTH_TOKEN": os.getenv(
|
||||
"EPC_AUTH_TOKEN",
|
||||
"test",
|
||||
), # overridden in GitHub Actions
|
||||
"GOOGLE_SOLAR_API_KEY": "test",
|
||||
"DB_HOST": "localhost",
|
||||
"DB_USERNAME": "test",
|
||||
|
|
|
|||
0
datatypes/epc/__init__.py
Normal file
0
datatypes/epc/__init__.py
Normal file
45
datatypes/epc/construction_age_band.py
Normal file
45
datatypes/epc/construction_age_band.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
import re
|
||||
from enum import Enum
|
||||
from typing import List
|
||||
|
||||
|
||||
class EpcConstructionAgeBand(Enum):
|
||||
before_1900: str = 'England and Wales: before 1900'
|
||||
from_1900_to_1929: str = 'England and Wales: 1900-1929'
|
||||
from_1930_to_1949: str = 'England and Wales: 1930-1949'
|
||||
from_1950_to_1966: str = 'England and Wales: 1950-1966'
|
||||
from_1967_to_1975: str = 'England and Wales: 1967-1975'
|
||||
from_1976_to_1982: str = 'England and Wales: 1976-1982'
|
||||
from_1983_to_1990: str = 'England and Wales: 1983-1990'
|
||||
from_1991_to_1995: str = 'England and Wales: 1991-1995'
|
||||
from_1996_to_2002: str = 'England and Wales: 1996-2002'
|
||||
from_2003_to_2006: str = 'England and Wales: 2003-2006'
|
||||
from_2007_to_2011: str = 'England and Wales: 2007-2011'
|
||||
from_2012_onwards: str = 'England and Wales: 2012-onwards'
|
||||
from_2012_to_2022: str = 'England and Wales: 2012-2022'
|
||||
from_2023_onwards: str = 'England and Wales: 2023 onwards'
|
||||
|
||||
def start_year(self) -> int:
|
||||
"""
|
||||
Extract the starting year of the age band.
|
||||
"""
|
||||
value = self.value.lower()
|
||||
|
||||
if 'before' in value:
|
||||
return 0
|
||||
match = re.search(r'(\d{4})', value)
|
||||
if not match:
|
||||
raise ValueError(f"Cannot determine start year from '{self.value}'")
|
||||
|
||||
return int(match.group(1))
|
||||
|
||||
@classmethod
|
||||
def from_year_onwards(cls, year: int) -> List["EpcConstructionAgeBand"]:
|
||||
"""
|
||||
Return all age bands whose starting year is >= the given year.
|
||||
"""
|
||||
return [
|
||||
band
|
||||
for band in cls
|
||||
if band.start_year() >= year
|
||||
]
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue