mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fixed merge conflicts with main
This commit is contained in:
commit
7bb7972549
84 changed files with 3370 additions and 1119 deletions
39
.devcontainer/asset_list/Dockerfile
Normal file
39
.devcontainer/asset_list/Dockerfile
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
FROM python:3.11.10-bullseye
|
||||
|
||||
|
||||
ARG USER=vscode
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
# 1) Toolchain + utilities for building libpostal
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
sudo jq vim curl git ca-certificates \
|
||||
build-essential pkg-config automake autoconf libtool \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# # 2) Build and install libpostal from source
|
||||
RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
|
||||
&& cd /tmp/libpostal \
|
||||
&& ./bootstrap.sh \
|
||||
&& ./configure --datadir=/usr/local/share/libpostal \
|
||||
&& make -j"$(nproc)" \
|
||||
&& make install \
|
||||
&& ldconfig \
|
||||
&& rm -rf /tmp/libpostal
|
||||
|
||||
# 3) Create the user and grant sudo privileges
|
||||
RUN useradd -m -s /usr/bin/bash ${USER} \
|
||||
&& echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \
|
||||
&& chmod 0440 /etc/sudoers.d/${USER}
|
||||
|
||||
# # 4) Python deps - if you want to run assest list
|
||||
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
ADD asset_list/requirements.txt requirements.txt
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
RUN pip install -r requirements.txt
|
||||
# 5) Workdir
|
||||
WORKDIR /workspaces/model
|
||||
|
||||
# 6) Make Python find your package
|
||||
# Add project root to PYTHONPATH for all processes
|
||||
ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"name": "Basic Python",
|
||||
"name": "SAL ENV",
|
||||
"dockerComposeFile": "docker-compose.yml",
|
||||
"service": "model",
|
||||
"service": "model-sal",
|
||||
"remoteUser": "vscode",
|
||||
"workspaceFolder": "/workspaces/model",
|
||||
"postStartCommand": "bash .devcontainer/post-install.sh",
|
||||
|
|
@ -1,14 +1,14 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
model:
|
||||
model-sal:
|
||||
user: "${UID}:${GID}"
|
||||
build:
|
||||
context: ..
|
||||
dockerfile: .devcontainer/Dockerfile
|
||||
context: ../..
|
||||
dockerfile: .devcontainer/asset_list/Dockerfile
|
||||
command: sleep infinity
|
||||
volumes:
|
||||
- ..:/workspaces/model
|
||||
- ../../:/workspaces/model
|
||||
networks:
|
||||
- model-net
|
||||
|
||||
24
.devcontainer/asset_list/requirements.txt
Normal file
24
.devcontainer/asset_list/requirements.txt
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
fastapi==0.115.2
|
||||
sqlalchemy==2.0.36
|
||||
psycopg2-binary==2.9.10
|
||||
python-jose==3.3.0
|
||||
cryptography==43.0.3
|
||||
mangum==0.19.0
|
||||
# AWS
|
||||
boto3==1.35.44
|
||||
# Data
|
||||
openpyxl==3.1.2
|
||||
# Basic
|
||||
pytz
|
||||
uvicorn[standard]
|
||||
# Testing
|
||||
pytest==9.0.2
|
||||
pytest-cov==7.0.0
|
||||
ipykernel>=6.25,<7
|
||||
pydantic-settings<2
|
||||
pyyaml>=6.0.1
|
||||
pydantic>=1.10.7,<2
|
||||
sqlmodel
|
||||
# Formatting
|
||||
black==26.1.0
|
||||
dotenv
|
||||
|
|
@ -34,7 +34,7 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
|
|||
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
ADD backend/engine/requirements.txt requirements1.txt
|
||||
ADD backend/app/requirements/requirements.txt requirements2.txt
|
||||
ADD .devcontainer/requirements.txt requirements3.txt
|
||||
ADD .devcontainer/backend/requirements.txt requirements3.txt
|
||||
RUN cat requirements1.txt requirements2.txt requirements3.txt > requirements.txt
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
40
.devcontainer/backend/devcontainer.json
Normal file
40
.devcontainer/backend/devcontainer.json
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
{
|
||||
"name": "Backend Model Env",
|
||||
"dockerComposeFile": "docker-compose.yml",
|
||||
"service": "model-backend",
|
||||
"remoteUser": "vscode",
|
||||
"workspaceFolder": "/workspaces/model",
|
||||
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
|
||||
"mounts": [
|
||||
"source=${localEnv:HOME},target=/workspaces/home,type=bind"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"ms-python.python",
|
||||
"ms-toolsai.jupyter",
|
||||
"mechatroner.rainbow-csv",
|
||||
"ms-toolsai.datawrangler",
|
||||
"lindacong.vscode-book-reader",
|
||||
"4ops.terraform",
|
||||
"fabiospampinato.vscode-todo-plus",
|
||||
"jgclark.vscode-todo-highlight",
|
||||
"corentinartaud.pdfpreview",
|
||||
"ms-python.vscode-python-envs",
|
||||
"ms-python.black-formatter",
|
||||
"waderyan.gitblame"
|
||||
],
|
||||
"settings": {
|
||||
"files.defaultWorkspace": "/workspaces/model",
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "ms-python.black-formatter",
|
||||
"editor.formatOnSave": true
|
||||
},
|
||||
"python.formatting.provider": "none"
|
||||
}
|
||||
}
|
||||
},
|
||||
"containerEnv": {
|
||||
"PYTHONFLAGS": "-Xfrozen_modules=off"
|
||||
}
|
||||
}
|
||||
28
.devcontainer/backend/docker-compose.yml
Normal file
28
.devcontainer/backend/docker-compose.yml
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
model-backend:
|
||||
user: "${UID}:${GID}"
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: .devcontainer/backend/Dockerfile
|
||||
command: sleep infinity
|
||||
volumes:
|
||||
- ../../:/workspaces/model
|
||||
|
||||
|
||||
db:
|
||||
image: postgres:17.4
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 5432:5432
|
||||
environment:
|
||||
- PGDATABASE=tech_team_local_db
|
||||
- POSTGRES_USER=postgres
|
||||
- POSTGRES_PASSWORD=makingwarmerhomes
|
||||
volumes:
|
||||
- postgres-data-two:/var/lib/postgresql/data
|
||||
|
||||
|
||||
volumes:
|
||||
postgres-data-two:
|
||||
14
.devcontainer/backend/post-install.sh
Normal file
14
.devcontainer/backend/post-install.sh
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
mkdir -p ~/.ipython/profile_default/startup
|
||||
|
||||
cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
# Adjust path as needed
|
||||
env_path = "/workspaces/model/backend/.env"
|
||||
if os.path.exists(env_path):
|
||||
load_dotenv(env_path)
|
||||
print("✔ Loaded .env into Jupyter kernel")
|
||||
else:
|
||||
print("⚠ No .env file found to load")
|
||||
EOF
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
# fastapi
|
||||
|
||||
fastapi==0.115.2
|
||||
sqlalchemy==2.0.36
|
||||
pydantic-settings==2.6.0
|
||||
78
.github/workflows/_build_image.yml
vendored
Normal file
78
.github/workflows/_build_image.yml
vendored
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
name: Build Docker image
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
ecr_repo:
|
||||
required: true
|
||||
type: string
|
||||
dockerfile_path:
|
||||
required: true
|
||||
type: string
|
||||
build_context:
|
||||
required: false
|
||||
default: "."
|
||||
type: string
|
||||
|
||||
outputs:
|
||||
image_digest:
|
||||
description: "Pushed image digest"
|
||||
value: ${{ jobs.build.outputs.image_digest }}
|
||||
ecr_repo_url:
|
||||
description: "ECR repository URL"
|
||||
value: ${{ jobs.build.outputs.ecr_repo_url }}
|
||||
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID:
|
||||
required: true
|
||||
AWS_SECRET_ACCESS_KEY:
|
||||
required: true
|
||||
AWS_REGION:
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
outputs:
|
||||
image_digest: ${{ steps.digest.outputs.image_digest }}
|
||||
ecr_repo_url: ${{ steps.repo.outputs.ecr_repo_url }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ secrets.AWS_REGION }}
|
||||
|
||||
- uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Resolve ECR repo URL
|
||||
id: repo
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
||||
|
||||
ECR_REPO_URL="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${{ inputs.ecr_repo }}"
|
||||
|
||||
echo "Resolved ECR repo URL (local var):"
|
||||
echo "$ECR_REPO_URL"
|
||||
|
||||
echo "ecr_repo_url=$ECR_REPO_URL" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Build & push image
|
||||
run: |
|
||||
IMAGE_URI="${{ steps.repo.outputs.ecr_repo_url }}:${GITHUB_SHA}"
|
||||
docker build -f ${{ inputs.dockerfile_path }} -t $IMAGE_URI ${{ inputs.build_context }}
|
||||
docker push $IMAGE_URI
|
||||
|
||||
- name: Resolve image digest
|
||||
id: digest
|
||||
run: |
|
||||
DIGEST=$(aws ecr describe-images \
|
||||
--repository-name ${{ inputs.ecr_repo }} \
|
||||
--image-ids imageTag=${GITHUB_SHA} \
|
||||
--query 'imageDetails[0].imageDigest' \
|
||||
--output text)
|
||||
echo "image_digest=$DIGEST" >> "$GITHUB_OUTPUT"
|
||||
91
.github/workflows/_deploy_lambda.yml
vendored
Normal file
91
.github/workflows/_deploy_lambda.yml
vendored
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
name: Deploy Lambda (Terraform)
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
lambda_name:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
lambda_path:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
stage:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
ecr_repo:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
image_digest:
|
||||
required: true
|
||||
type: string
|
||||
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID:
|
||||
required: true
|
||||
AWS_SECRET_ACCESS_KEY:
|
||||
required: true
|
||||
AWS_REGION:
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Debug inputs
|
||||
run: |
|
||||
echo "lambda_name=${{ inputs.lambda_name }}"
|
||||
echo "lambda_path=${{ inputs.lambda_path }}"
|
||||
echo "stage=${{ inputs.stage }}"
|
||||
echo "ecr_repo_url=${{ inputs.ecr_repo_url }}"
|
||||
echo "image_digest=${{ inputs.image_digest }}"
|
||||
|
||||
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: ${{ secrets.AWS_REGION }}
|
||||
|
||||
- uses: hashicorp/setup-terraform@v3
|
||||
|
||||
- uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Resolve ECR repo URL
|
||||
id: repo
|
||||
env:
|
||||
AWS_REGION: ${{ secrets.AWS_REGION }}
|
||||
run: |
|
||||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
|
||||
ECR_REPO_URL="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${{ inputs.ecr_repo }}"
|
||||
echo "ecr_repo_url=$ECR_REPO_URL" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Terraform Init
|
||||
working-directory: ${{ inputs.lambda_path }}
|
||||
run: terraform init -reconfigure
|
||||
|
||||
- name: Terraform Workspace
|
||||
working-directory: ${{ inputs.lambda_path }}
|
||||
run: |
|
||||
terraform workspace select ${{ inputs.stage }} \
|
||||
|| terraform workspace new ${{ inputs.stage }}
|
||||
|
||||
- name: Terraform Plan
|
||||
working-directory: ${{ inputs.lambda_path }}
|
||||
run: |
|
||||
terraform plan \
|
||||
-var="stage=${{ inputs.stage }}" \
|
||||
-var="lambda_name=${{ inputs.lambda_name }}" \
|
||||
-var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \
|
||||
-var="image_digest=${{ inputs.image_digest }}" \
|
||||
-out=lambdaplan
|
||||
|
||||
- name: Terraform Apply
|
||||
working-directory: ${{ inputs.lambda_path }}
|
||||
run: terraform apply -auto-approve lambdaplan
|
||||
128
.github/workflows/deploy_terraform.yml
vendored
128
.github/workflows/deploy_terraform.yml
vendored
|
|
@ -1,80 +1,98 @@
|
|||
name: Deploy terraform stack
|
||||
name: Deploy infrastructure
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- dev
|
||||
- prod
|
||||
- "**"
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
determine_stage:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
stage: ${{ steps.set-stage.outputs.stage }}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Setup AWS credentials file
|
||||
- name: Determine stage from branch
|
||||
id: set-stage
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p ~/.aws
|
||||
echo "[DevAdmin]" > ~/.aws/credentials
|
||||
echo "aws_access_key_id = ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}" >> ~/.aws/credentials
|
||||
echo "aws_secret_access_key = ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}" >> ~/.aws/credentials
|
||||
echo "[ProdAdmin]" >> ~/.aws/credentials
|
||||
echo "aws_access_key_id = ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}" >> ~/.aws/credentials
|
||||
echo "aws_secret_access_key = ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}" >> ~/.aws/credentials
|
||||
BRANCH="${GITHUB_REF_NAME}"
|
||||
|
||||
- name: Setup AWS config file
|
||||
run: |
|
||||
echo "[profile DevAdmin]" > ~/.aws/config
|
||||
echo "region = eu-west-2" >> ~/.aws/config
|
||||
echo "[profile ProdAdmin]" >> ~/.aws/config
|
||||
echo "region = eu-west-2" >> ~/.aws/config
|
||||
if [[ "$BRANCH" == "prod" ]]; then
|
||||
echo "stage=prod" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v1
|
||||
with:
|
||||
terraform_version: 1.5.2
|
||||
elif [[ "$BRANCH" == "dev" ]]; then
|
||||
echo "stage=dev" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Configure AWS credentials (DevAdmin)
|
||||
uses: aws-actions/configure-aws-credentials@v1
|
||||
else
|
||||
echo "stage=dev" >> "$GITHUB_OUTPUT"
|
||||
fi
|
||||
|
||||
# ============================================================
|
||||
# 1️⃣ Shared Terraform (infra)
|
||||
# ============================================================
|
||||
shared_terraform:
|
||||
needs: determine_stage
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
STAGE: ${{ needs.determine_stage.outputs.stage }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: eu-west-2
|
||||
env:
|
||||
AWS_PROFILE: "DevAdmin"
|
||||
aws-region: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
- uses: hashicorp/setup-terraform@v3
|
||||
|
||||
- name: Terraform Init
|
||||
run: cd infrastructure/terraform && terraform init
|
||||
working-directory: infrastructure/terraform/shared
|
||||
run: terraform init -reconfigure
|
||||
|
||||
- name: Terraform Workspace
|
||||
run: |
|
||||
BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
|
||||
cd infrastructure/terraform
|
||||
terraform workspace select ${BRANCH_NAME} || terraform workspace new ${BRANCH_NAME}
|
||||
working-directory: infrastructure/terraform/shared
|
||||
run: terraform workspace select ${STAGE} || terraform workspace new ${STAGE}
|
||||
|
||||
- name: Terraform Plan
|
||||
run: |
|
||||
BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
|
||||
cd infrastructure/terraform && terraform plan -var-file=${BRANCH_NAME}.tfvars
|
||||
working-directory: infrastructure/terraform/shared
|
||||
run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan
|
||||
|
||||
- name: Deploy to Dev
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
run: cd infrastructure/terraform && terraform apply -var-file=dev.tfvars -auto-approve
|
||||
env:
|
||||
name: dev
|
||||
- name: Terraform Apply
|
||||
if: env.STAGE == 'prod'
|
||||
working-directory: infrastructure/terraform/shared
|
||||
run: terraform apply -auto-approve tfplan
|
||||
|
||||
- name: Configure AWS credentials (ProdAdmin)
|
||||
uses: aws-actions/configure-aws-credentials@v1
|
||||
# ============================================================
|
||||
# 2️⃣ Build Address 2 UPRN image and Push
|
||||
# ============================================================
|
||||
address2uprn_image:
|
||||
needs: [determine_stage, shared_terraform]
|
||||
uses: ./.github/workflows/_build_image.yml
|
||||
with:
|
||||
aws-access-key-id: ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}
|
||||
aws-secret-access-key: ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}
|
||||
aws-region: eu-west-2
|
||||
env:
|
||||
AWS_PROFILE: "ProdAdmin"
|
||||
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
|
||||
dockerfile_path: backend/address2UPRN/Dockerfile
|
||||
build_context: backend/address2UPRN
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
- name: Deploy to Prod
|
||||
if: github.ref == 'refs/heads/prod'
|
||||
run: cd infrastructure/terraform && terraform apply -var-file=prod.tfvars -auto-approve
|
||||
env:
|
||||
name: prod
|
||||
# ============================================================
|
||||
# 3️⃣ Deploy Address 2 UPRN Lambda
|
||||
# ============================================================
|
||||
address2uprn_lambda:
|
||||
needs: [address2uprn_image, determine_stage]
|
||||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: address2uprn
|
||||
lambda_path: infrastructure/terraform/lambda/address2UPRN
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.address2uprn_image.outputs.image_digest }}
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
|
|
|||
6
.github/workflows/unit_tests.yml
vendored
6
.github/workflows/unit_tests.yml
vendored
|
|
@ -2,6 +2,12 @@ name: Run unit tests
|
|||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- "**"
|
||||
push:
|
||||
branches:
|
||||
- "**"
|
||||
|
||||
|
||||
jobs:
|
||||
test:
|
||||
|
|
|
|||
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
|
|
@ -9,9 +9,12 @@
|
|||
"path": "/bin/bash"
|
||||
}
|
||||
},
|
||||
<<<<<<< HEAD
|
||||
=======
|
||||
"python.testing.unittestEnabled": false,
|
||||
"python.testing.pytestEnabled": true,
|
||||
"python.testing.pytestArgs": ["-s", "-q", "--no-cov"]
|
||||
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
|
||||
|
||||
// Hot reload setting that needs to be in user settings
|
||||
// "jupyter.runStartupCommands": [
|
||||
|
|
|
|||
|
|
@ -34,7 +34,8 @@ from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
|
|||
logger = setup_logger()
|
||||
|
||||
# OpenAI API Key (set this in your environment variables for security)
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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")
|
||||
|
||||
|
||||
|
||||
class DataRemapper:
|
||||
|
|
@ -1160,12 +1161,16 @@ class AssetList:
|
|||
axis=1
|
||||
)
|
||||
|
||||
col = self.EPC_API_DATA_NAMES["roof-description"]
|
||||
|
||||
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = self.standardised_asset_list.apply(
|
||||
lambda x: RoofAttributes(description=x[self.EPC_API_DATA_NAMES["roof-description"]]).process()[
|
||||
lambda x: RoofAttributes(description=x[col]).process()[
|
||||
"insulation_thickness"] if not pd.isnull(
|
||||
x[self.EPC_API_DATA_NAMES["roof-description"]]) else None,
|
||||
x[col]) else None,
|
||||
axis=1
|
||||
)
|
||||
|
||||
|
||||
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = (
|
||||
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].str.replace("+", "")
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# OpenAI API Key (set this in your environment variables for security)
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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")
|
||||
|
||||
|
||||
class DataRemapper:
|
||||
|
|
|
|||
0
asset_list/__init__.py
Normal file
0
asset_list/__init__.py
Normal file
|
|
@ -14,22 +14,32 @@ from dotenv import load_dotenv
|
|||
from backend.SearchEpc import SearchEpc
|
||||
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
EPC_AUTH_TOKEN = os.getenv(
|
||||
"EPC_AUTH_TOKEN",
|
||||
)
|
||||
|
||||
|
||||
def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"):
|
||||
def extract_address1(
|
||||
asset_list, full_address_col, postcode_col, method="first_two_words"
|
||||
):
|
||||
if method == "first_two_words":
|
||||
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
|
||||
asset_list["address1_extracted"] = (
|
||||
asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
|
||||
)
|
||||
return asset_list
|
||||
|
||||
if method == "first_word":
|
||||
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0]
|
||||
asset_list["address1_extracted"] = (
|
||||
asset_list[full_address_col].str.split(" ").str[0]
|
||||
)
|
||||
return asset_list
|
||||
|
||||
if method == "house_number_extraction":
|
||||
asset_list["address1_extracted"] = asset_list.apply(
|
||||
lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]),
|
||||
axis=1
|
||||
lambda x: SearchEpc.get_house_number(
|
||||
address=x[full_address_col], postcode=x[postcode_col]
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
return asset_list
|
||||
|
||||
|
|
@ -59,21 +69,20 @@ def app():
|
|||
Property UPRN
|
||||
"""
|
||||
|
||||
# Fairhive
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Fairhive"
|
||||
data_filename = "Fairhive Asset list.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = 'POSTCODE'
|
||||
address1_column = "ADDRESS"
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney"
|
||||
data_filename = "Domna SHF Wave 3 (3).xlsx"
|
||||
sheet_name = "Domna Wave 3"
|
||||
postcode_column = "Postcode"
|
||||
address1_column = "Address 1"
|
||||
address1_method = None
|
||||
fulladdress_column = 'ADDRESS'
|
||||
address_cols_to_concat = []
|
||||
fulladdress_column = None
|
||||
address_cols_to_concat = ["Address 1"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "PROPERTY TYPE"
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
landlord_year_built = "Construction Years"
|
||||
landlord_os_uprn = "UPRN"
|
||||
landlord_property_type = "Type"
|
||||
landlord_built_form = "Attachment"
|
||||
landlord_wall_construction = "Wall type"
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
|
|
@ -93,93 +102,28 @@ def app():
|
|||
asset_list_header = 0
|
||||
landlord_block_reference = None
|
||||
|
||||
# Hyde
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Minor Works"
|
||||
data_filename = "Hyde Group - Domna Minor Works Programme List.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = 'Postcode'
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
fulladdress_column = 'Address'
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Age"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = "Property Type"
|
||||
landlord_wall_construction = "Walls"
|
||||
landlord_roof_construction = "Roofs"
|
||||
landlord_heating_system = "Heating"
|
||||
landlord_existing_pv = "Renewables"
|
||||
landlord_property_id = "Organisation Reference"
|
||||
landlord_sap = "SAP (10)"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_id_colnames = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
asset_list_header = 0
|
||||
landlord_block_reference = None
|
||||
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA/20260129 SAL"
|
||||
data_filename = "NCHA ASSET LIST 1.xlsx"
|
||||
sheet_name = "NCHA ASSET LIST"
|
||||
postcode_column = 'POSTCODE'
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
fulladdress_column = 'ADDRESS'
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "PROPERTY TYPE"
|
||||
landlord_built_form = "BUILD FORM"
|
||||
landlord_wall_construction = "wall combined"
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "UPRN"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_id_colnames = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
asset_list_header = 0
|
||||
landlord_block_reference = None
|
||||
|
||||
# Peabody data for cleaning
|
||||
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
|
||||
"Project/data_validation")
|
||||
data_folder = (
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
|
||||
"Project/data_validation"
|
||||
)
|
||||
data_filename = "to_standardise_uprns.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = 'Postcode'
|
||||
address1_column = "Address 1"
|
||||
address1_method = None
|
||||
fulladdress_column = None
|
||||
address_cols_to_concat = ["Address 1", "Address 2", "Address 3"]
|
||||
postcode_column = "Postcode"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
fulladdress_column = "Address"
|
||||
address_cols_to_concat = None
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Type"
|
||||
landlord_built_form = "Attachment"
|
||||
landlord_property_type = None
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Org Ref"
|
||||
landlord_property_id = "LLUPRN"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
|
|
@ -195,40 +139,6 @@ def app():
|
|||
asset_list_header = 0
|
||||
landlord_block_reference = None
|
||||
|
||||
# Lambeth:
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th"
|
||||
# data_filename = "lambeth_sw2_leigham court estate.xlsx"
|
||||
# sheet_name = "Sheet1"
|
||||
# postcode_column = 'Postcode'
|
||||
# address1_column = "Address"
|
||||
# address1_method = None
|
||||
# fulladdress_column = None
|
||||
# address_cols_to_concat = ["Address"]
|
||||
# missing_postcodes_method = None
|
||||
# landlord_year_built = None
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = None
|
||||
# landlord_built_form = None
|
||||
# landlord_wall_construction = None
|
||||
# landlord_roof_construction = None
|
||||
# landlord_heating_system = None
|
||||
# landlord_existing_pv = None
|
||||
# landlord_property_id = "row_id"
|
||||
# landlord_sap = None
|
||||
# outcomes_filename = None
|
||||
# outcomes_sheetname = None
|
||||
# outcomes_postcode = None
|
||||
# outcomes_houseno = None
|
||||
# outcomes_id = None
|
||||
# outcomes_address = None
|
||||
# master_filepaths = []
|
||||
# master_id_colnames = []
|
||||
# master_to_asset_list_filepath = None
|
||||
# phase = False
|
||||
# ecosurv_landlords = None
|
||||
# asset_list_header = 0
|
||||
# landlord_block_reference = None
|
||||
|
||||
# Maps addresses to uprn in problematic cases
|
||||
manual_uprn_map = {}
|
||||
|
||||
|
|
@ -253,49 +163,62 @@ def app():
|
|||
landlord_existing_pv=landlord_existing_pv,
|
||||
landlord_sap=landlord_sap,
|
||||
landlord_block_reference=landlord_block_reference,
|
||||
phase=phase
|
||||
phase=phase,
|
||||
)
|
||||
asset_list.init_standardise()
|
||||
|
||||
# We produce the new maps, which can be saved for future useage
|
||||
new_property_type_map = {
|
||||
k: v for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_property_type] if
|
||||
asset_list.landlord_property_type else {}
|
||||
k: v
|
||||
for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_property_type]
|
||||
if asset_list.landlord_property_type
|
||||
else {}
|
||||
).items()
|
||||
if k not in PROPERTY_MAPPING
|
||||
}
|
||||
new_built_form_map = {
|
||||
k: v for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_built_form] if
|
||||
asset_list.landlord_built_form else {}
|
||||
k: v
|
||||
for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_built_form]
|
||||
if asset_list.landlord_built_form
|
||||
else {}
|
||||
).items()
|
||||
if k not in BUILT_FORM_MAPPINGS
|
||||
}
|
||||
new_wall_map = {
|
||||
k: v for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_wall_construction] if
|
||||
asset_list.landlord_wall_construction else {}
|
||||
k: v
|
||||
for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_wall_construction]
|
||||
if asset_list.landlord_wall_construction
|
||||
else {}
|
||||
).items()
|
||||
if k not in WALL_CONSTRUCTION_MAPPINGS
|
||||
}
|
||||
new_heating_map = {
|
||||
k: v for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_heating_system] if
|
||||
asset_list.landlord_heating_system else {}
|
||||
k: v
|
||||
for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_heating_system]
|
||||
if asset_list.landlord_heating_system
|
||||
else {}
|
||||
).items()
|
||||
if k not in HEATING_MAPPINGS
|
||||
}
|
||||
new_existing_pv_map = {
|
||||
k: v for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
|
||||
k: v
|
||||
for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_existing_pv]
|
||||
if asset_list.landlord_existing_pv
|
||||
else {}
|
||||
).items()
|
||||
if k not in EXISTING_PV_MAPPINGS
|
||||
}
|
||||
new_roof_construction_map = {
|
||||
k: v for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_roof_construction] if
|
||||
asset_list.landlord_roof_construction else {}
|
||||
k: v
|
||||
for k, v in (
|
||||
asset_list.variable_mappings[asset_list.landlord_roof_construction]
|
||||
if asset_list.landlord_roof_construction
|
||||
else {}
|
||||
).items()
|
||||
if k not in ROOF_CONSTRUCTION_MAPPINGS
|
||||
}
|
||||
|
|
@ -309,7 +232,7 @@ def app():
|
|||
outcomes_address=outcomes_address,
|
||||
outcomes_postcode=outcomes_postcode,
|
||||
outcomes_houseno=outcomes_houseno,
|
||||
outcomes_id=outcomes_id
|
||||
outcomes_id=outcomes_id,
|
||||
)
|
||||
|
||||
asset_list.flag_survey_master(
|
||||
|
|
@ -343,7 +266,9 @@ def app():
|
|||
skip = max(chunk_indexes)
|
||||
|
||||
if any(x in folder_contents for x in downloaded_files):
|
||||
skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents])
|
||||
skip = max(
|
||||
[i for i in chunk_indexes if filename.format(i=i) in folder_contents]
|
||||
)
|
||||
|
||||
for i in range(0, len(asset_list.standardised_asset_list), chunk_size):
|
||||
print(f"Processing chunk {i} to {i + chunk_size}")
|
||||
|
|
@ -362,7 +287,7 @@ def app():
|
|||
built_form_column=AssetList.STANDARD_BUILT_FORM,
|
||||
manual_uprn_map=manual_uprn_map,
|
||||
epc_api_only=epc_api_only,
|
||||
epc_auth_token=EPC_AUTH_TOKEN
|
||||
epc_auth_token=EPC_AUTH_TOKEN,
|
||||
)
|
||||
|
||||
# We now retrieve any failed properties
|
||||
|
|
@ -385,7 +310,9 @@ def app():
|
|||
|
||||
# Append the failed data to the main data
|
||||
# Store the chunk locally as a csv
|
||||
pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False)
|
||||
pd.DataFrame(epc_data_chunk).to_csv(
|
||||
os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False
|
||||
)
|
||||
# Store the errors and no-data locally
|
||||
with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f:
|
||||
json.dump(errors_chunk, f)
|
||||
|
|
@ -416,7 +343,9 @@ def app():
|
|||
|
||||
unique_recommendations = set()
|
||||
for _, row in recommendations_df.iterrows():
|
||||
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
|
||||
unique_recommendations.update(
|
||||
[rec["improvement-summary-text"] for rec in row["recommendations"]]
|
||||
)
|
||||
|
||||
columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations)
|
||||
transformed_data = []
|
||||
|
|
@ -436,20 +365,24 @@ def app():
|
|||
transformed_df = pd.DataFrame(transformed_data)
|
||||
for col in [
|
||||
"Floor insulation (solid floor)",
|
||||
"Floor insulation", "Floor insulation (suspended floor)"
|
||||
"Floor insulation",
|
||||
"Floor insulation (suspended floor)",
|
||||
]:
|
||||
if col not in transformed_df.columns:
|
||||
transformed_df[col] = False
|
||||
transformed_df = transformed_df[
|
||||
[
|
||||
asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)",
|
||||
"Floor insulation", "Floor insulation (suspended floor)"
|
||||
asset_list.DOMNA_PROPERTY_ID,
|
||||
"Floor insulation (solid floor)",
|
||||
"Floor insulation",
|
||||
"Floor insulation (suspended floor)",
|
||||
]
|
||||
]
|
||||
|
||||
transformed_df["epc_has_floor_recommendation"] = (
|
||||
transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] |
|
||||
transformed_df["Floor insulation (suspended floor)"]
|
||||
transformed_df["Floor insulation (solid floor)"]
|
||||
| transformed_df["Floor insulation"]
|
||||
| transformed_df["Floor insulation (suspended floor)"]
|
||||
)
|
||||
|
||||
# Get the find my epc data
|
||||
|
|
@ -462,21 +395,20 @@ def app():
|
|||
find_my_epc_data.append(
|
||||
{
|
||||
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID],
|
||||
**x["find_my_epc_data"]
|
||||
**x["find_my_epc_data"],
|
||||
}
|
||||
)
|
||||
else:
|
||||
find_my_epc_data.append(
|
||||
{
|
||||
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]
|
||||
}
|
||||
{asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]}
|
||||
)
|
||||
|
||||
find_my_epc_data = pd.DataFrame(find_my_epc_data)
|
||||
|
||||
find_my_epc_data = find_my_epc_data.merge(
|
||||
transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]],
|
||||
how="left", on=asset_list.DOMNA_PROPERTY_ID
|
||||
how="left",
|
||||
on=asset_list.DOMNA_PROPERTY_ID,
|
||||
)
|
||||
|
||||
# We check if we get the solar pv column:
|
||||
|
|
@ -486,27 +418,33 @@ def app():
|
|||
# Retrieve just the data we need
|
||||
epc_df = epc_df[
|
||||
[asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
|
||||
].rename(
|
||||
columns=asset_list.EPC_API_DATA_NAMES
|
||||
)
|
||||
].rename(columns=asset_list.EPC_API_DATA_NAMES)
|
||||
|
||||
# Look for columns not in the find my EPC data, which will have happened if we didn't
|
||||
# retrieve it in the first place
|
||||
missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns]
|
||||
missed_find_epc_cols = [
|
||||
c
|
||||
for c in list(asset_list.FIND_EPC_DATA_NAMES.keys())
|
||||
if c not in find_my_epc_data.columns
|
||||
]
|
||||
if missed_find_epc_cols:
|
||||
for c in missed_find_epc_cols:
|
||||
find_my_epc_data[c] = None
|
||||
|
||||
epc_df = epc_df.merge(
|
||||
find_my_epc_data[
|
||||
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())
|
||||
]
|
||||
.rename(columns=asset_list.FIND_EPC_DATA_NAMES),
|
||||
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]
|
||||
+ list(asset_list.FIND_EPC_DATA_NAMES.keys())
|
||||
].rename(columns=asset_list.FIND_EPC_DATA_NAMES),
|
||||
how="left",
|
||||
on=asset_list.DOMNA_PROPERTY_ID
|
||||
on=asset_list.DOMNA_PROPERTY_ID,
|
||||
)
|
||||
|
||||
asset_list.merge_data(epc_df)
|
||||
# asset_list.standardised_asset_list = asset_list.standardised_asset_list[
|
||||
# asset_list.standardised_asset_list["domna_full_address"]
|
||||
# != "120 Airdrie Crescent, Burnley, Lancashire"
|
||||
# ]
|
||||
asset_list.extract_attributes()
|
||||
asset_list.identify_worktypes()
|
||||
|
||||
|
|
@ -516,7 +454,10 @@ def app():
|
|||
asset_list.get_work_figures()
|
||||
|
||||
# Store as an excel
|
||||
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
|
||||
filename = (
|
||||
os.path.join(data_folder, ".".join(data_filename.split(".")[:-1]))
|
||||
+ " - Standardised.xlsx"
|
||||
)
|
||||
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
|
||||
|
||||
# Determine inspections priority
|
||||
|
|
@ -540,26 +481,42 @@ def app():
|
|||
# )
|
||||
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
|
||||
asset_list.standardised_asset_list.to_excel(
|
||||
writer, sheet_name="Standardised Asset List", index=False
|
||||
)
|
||||
if asset_list.block_analysis_df is not None:
|
||||
asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
|
||||
asset_list.block_analysis_df.to_excel(
|
||||
writer, sheet_name="Block Analysis", index=False
|
||||
)
|
||||
# If we have outcomes, we add a tab with the outcomes
|
||||
if not asset_list.outcomes_for_output.empty:
|
||||
asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
|
||||
asset_list.outcomes_for_output.to_excel(
|
||||
writer, sheet_name="Outcomes", index=False
|
||||
)
|
||||
|
||||
if not asset_list.unmatched_submissions.empty:
|
||||
asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
|
||||
asset_list.unmatched_submissions.to_excel(
|
||||
writer, sheet_name="Unmatched Submissions", index=False
|
||||
)
|
||||
|
||||
if not asset_list.outcomes_no_match.empty:
|
||||
asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False)
|
||||
asset_list.outcomes_no_match.to_excel(
|
||||
writer, sheet_name="Unmatched Outcomes", index=False
|
||||
)
|
||||
|
||||
if not asset_list.ecosurv_no_match.empty:
|
||||
asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
|
||||
asset_list.ecosurv_no_match.to_excel(
|
||||
writer, sheet_name="Unmatched Ecosurv", index=False
|
||||
)
|
||||
|
||||
if not asset_list.geographical_areas.empty:
|
||||
asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
|
||||
asset_list.geographical_areas.to_excel(
|
||||
writer, sheet_name="Geographical Areas", index=False
|
||||
)
|
||||
|
||||
# Store dupes
|
||||
if asset_list.duplicated_addresses is not None:
|
||||
if not asset_list.duplicated_addresses.empty:
|
||||
asset_list.duplicated_addresses.to_excel(writer, sheet_name="Duplicate Properties", index=False)
|
||||
asset_list.duplicated_addresses.to_excel(
|
||||
writer, sheet_name="Duplicate Properties", index=False
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
postal
|
||||
pandas
|
||||
usaddress
|
||||
pydantic-settings==2.6.0
|
||||
epc-api-python==1.0.2
|
||||
thefuzz
|
||||
boto3
|
||||
|
|
@ -10,6 +9,5 @@ openai>=1.3.5
|
|||
tiktoken
|
||||
msgpack
|
||||
beautifulsoup4
|
||||
pydantic>=1.10.7
|
||||
typing-extensions>=4.5.0
|
||||
requests>=2.28.2
|
||||
22
backend/.env.local
Normal file
22
backend/.env.local
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
DB_HOST=db
|
||||
DB_PORT=5432
|
||||
DB_NAME=tech_team_local_db
|
||||
DB_USERNAME=postgres
|
||||
DB_PASSWORD=makingwarmerhomes
|
||||
|
||||
|
||||
#not used
|
||||
GOOGLE_SOLAR_API_KEY="test"
|
||||
SAP_PREDICTIONS_BUCKET="test"
|
||||
CARBON_PREDICTIONS_BUCKET="test"
|
||||
HEAT_PREDICTIONS_BUCKET="test"
|
||||
HEATING_KWH_PREDICTIONS_BUCKET="test"
|
||||
HOTWATER_KWH_PREDICTIONS_BUCKET="test"
|
||||
API_KEY="test"
|
||||
ENVIRONMENT="test"
|
||||
SECRET_KEY="test"
|
||||
PLAN_TRIGGER_BUCKET="test"
|
||||
DATA_BUCKET="test"
|
||||
EPC_AUTH_TOKEN="test"
|
||||
ENGINE_SQS_URL="test"
|
||||
ENERGY_ASSESSMENTS_BUCKET="test"
|
||||
7
backend/address2UPRN/Dockerfile
Normal file
7
backend/address2UPRN/Dockerfile
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
FROM public.ecr.aws/lambda/python:3.10
|
||||
|
||||
# Copy function code
|
||||
COPY main.py .
|
||||
|
||||
# Set the handler
|
||||
CMD ["main.handler"]
|
||||
20
backend/address2UPRN/README.md
Normal file
20
backend/address2UPRN/README.md
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
We have list of address as input.
|
||||
|
||||
It'll come in batches of the same post code and from then we want to somehow convert that into UPRN
|
||||
|
||||
if this lambda/function can do that we'll be speeding ahead
|
||||
|
||||
|
||||
Energy Performance Information: https://epc.opendatacommunities.org/
|
||||
|
||||
guidance page: https://epc.opendatacommunities.org/docs/guidance#field_domestic_LMK_KEY
|
||||
|
||||
Example of past khalims code that he wrote some tests for: https://github.com/Hestia-Homes/Model/blob/941be42b83a590e838fd3ee475bfd1ff31438789/backend/tests/test_search_epc.py#L11
|
||||
|
||||
|
||||
Example of EPC search: https://github.com/Hestia-Homes/Model/blob/941be42b83a590e838fd3ee475bfd1ff31438789/backend/SearchEpc.py#L118
|
||||
|
||||
|
||||
|
||||
Khalim has made a python package to help scrape data: https://github.com/KhalimCK/epc-api-python
|
||||
|
||||
0
backend/address2UPRN/__init__.py
Normal file
0
backend/address2UPRN/__init__.py
Normal file
567
backend/address2UPRN/main.py
Normal file
567
backend/address2UPRN/main.py
Normal file
|
|
@ -0,0 +1,567 @@
|
|||
from epc_api.client import EpcClient
|
||||
import os
|
||||
from urllib.parse import urlencode
|
||||
import pandas as pd
|
||||
from difflib import SequenceMatcher
|
||||
from tqdm import tqdm
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
import re
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv(
|
||||
"EPC_AUTH_TOKEN",
|
||||
)
|
||||
|
||||
if EPC_AUTH_TOKEN is None:
|
||||
raise RuntimeError("EPC_AUTH_TOKEN not defined in env")
|
||||
|
||||
import re
|
||||
from difflib import SequenceMatcher
|
||||
from typing import Set
|
||||
|
||||
|
||||
def levenshtein(a: str, b: str) -> float:
|
||||
"""
|
||||
Address similarity score in [0, 1].
|
||||
|
||||
Strategy:
|
||||
- Normalise
|
||||
- Strongly penalise mismatched house/flat numbers
|
||||
- Combine token overlap + character similarity
|
||||
"""
|
||||
|
||||
def extract_number_sequence(s: str) -> list[str]:
|
||||
return re.findall(r"\d+[a-z]?", s)
|
||||
|
||||
def extract_numbers(s: str) -> Set[str]:
|
||||
return set(extract_number_sequence(s))
|
||||
|
||||
def tokenise(s: str) -> Set[str]:
|
||||
return set(s.split())
|
||||
|
||||
def extract_building_number(s: str) -> str | None:
|
||||
"""
|
||||
Extract the main building number (NOT flat/unit).
|
||||
Assumes formats like:
|
||||
- '42 moreton road'
|
||||
- 'flat 3 42 moreton road'
|
||||
"""
|
||||
tokens = s.split()
|
||||
|
||||
# remove flat/unit context
|
||||
cleaned = []
|
||||
skip_next = False
|
||||
for t in tokens:
|
||||
if t in ("flat", "apt", "apartment", "unit"):
|
||||
skip_next = True
|
||||
continue
|
||||
if skip_next:
|
||||
skip_next = False
|
||||
continue
|
||||
cleaned.append(t)
|
||||
|
||||
# first remaining number is building number
|
||||
for t in cleaned:
|
||||
if re.fullmatch(r"\d+[a-z]?", t):
|
||||
return t
|
||||
|
||||
return None
|
||||
|
||||
a_norm = normalise_address(a)
|
||||
b_norm = normalise_address(b)
|
||||
|
||||
# --- hard signal: numbers ---
|
||||
nums_a = extract_numbers(a_norm)
|
||||
nums_b = extract_numbers(b_norm)
|
||||
|
||||
if nums_a and not nums_b:
|
||||
return 0.0
|
||||
|
||||
# No shared numbers at all → impossible match
|
||||
if nums_a and nums_b and nums_a.isdisjoint(nums_b):
|
||||
return 0.0
|
||||
|
||||
# 🔒 HARD GUARD: building number must match
|
||||
bld_a = extract_building_number(a_norm)
|
||||
bld_b = extract_building_number(b_norm)
|
||||
|
||||
if bld_a and bld_b and bld_a != bld_b:
|
||||
return 0.0
|
||||
|
||||
# --- order-sensitive flat/building guard ---
|
||||
seq_a = extract_number_sequence(a_norm)
|
||||
seq_b = extract_number_sequence(b_norm)
|
||||
|
||||
has_flat_token_user = any(
|
||||
tok in a_norm for tok in ("flat", "apt", "apartment", "unit")
|
||||
)
|
||||
has_flat_token_epc = "flat" in b_norm
|
||||
|
||||
if (
|
||||
len(seq_a) == 2
|
||||
and len(seq_b) >= 2
|
||||
and has_flat_token_epc
|
||||
and not has_flat_token_user
|
||||
and seq_a != seq_b[:2]
|
||||
):
|
||||
return 0.0
|
||||
|
||||
# --- token similarity (order-independent) ---
|
||||
toks_a = tokenise(a_norm)
|
||||
toks_b = tokenise(b_norm)
|
||||
|
||||
if not toks_a or not toks_b:
|
||||
token_score = 0.0
|
||||
else:
|
||||
token_score = len(toks_a & toks_b) / len(toks_a | toks_b)
|
||||
|
||||
# --- character similarity (soft signal) ---
|
||||
char_score = SequenceMatcher(None, a_norm, b_norm).ratio()
|
||||
|
||||
# --- weighted blend ---
|
||||
return round(
|
||||
0.65 * token_score + 0.35 * char_score,
|
||||
4,
|
||||
)
|
||||
|
||||
|
||||
def normalise_address(s: str) -> str:
|
||||
"""
|
||||
Canonical UK-focused address normalisation.
|
||||
|
||||
- Lowercases
|
||||
- Removes punctuation (keeps / for flats)
|
||||
- Normalises whitespace
|
||||
- Applies synonym compression at token level
|
||||
"""
|
||||
|
||||
if not s:
|
||||
return ""
|
||||
|
||||
ADDRESS_SYNONYMS = {
|
||||
# street types
|
||||
"rd": "road",
|
||||
"rd.": "road",
|
||||
"st": "street",
|
||||
"st.": "street",
|
||||
"ave": "avenue",
|
||||
"ave.": "avenue",
|
||||
"ln": "lane",
|
||||
"ln.": "lane",
|
||||
"cres": "crescent",
|
||||
"ct": "court",
|
||||
"dr": "drive",
|
||||
# flats / units
|
||||
"apt": "flat",
|
||||
"apartment": "flat",
|
||||
"unit": "flat",
|
||||
"ste": "suite",
|
||||
# numbering noise
|
||||
"no": "",
|
||||
"no.": "",
|
||||
}
|
||||
# 1. lowercase
|
||||
s = s.lower()
|
||||
|
||||
# 1.5 split digit-letter suffixes
|
||||
s = re.sub(r"(\d+)([a-z])\b", r"\1 \2", s)
|
||||
|
||||
# 2. remove punctuation except /
|
||||
s = re.sub(r"[^\w\s/]", " ", s)
|
||||
|
||||
# 3. normalise whitespace
|
||||
s = re.sub(r"\s+", " ", s).strip()
|
||||
|
||||
# 4. tokenise + synonym normalisation
|
||||
tokens = []
|
||||
for tok in s.split():
|
||||
replacement = ADDRESS_SYNONYMS.get(tok, tok)
|
||||
if replacement:
|
||||
tokens.append(replacement)
|
||||
|
||||
return " ".join(tokens)
|
||||
|
||||
|
||||
def score_addresses(
|
||||
df: pd.DataFrame,
|
||||
user_address: str,
|
||||
column: str = "address",
|
||||
) -> pd.Series:
|
||||
if column not in df.columns:
|
||||
raise ValueError(f"Missing column: {column}")
|
||||
|
||||
return df[column].apply(lambda x: levenshtein(user_address, x))
|
||||
|
||||
|
||||
def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
|
||||
"""
|
||||
Recursively fetch EPC data by postcode.
|
||||
If results hit the size limit, retry with double size up to max_attempts.
|
||||
"""
|
||||
client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
||||
|
||||
url = os.path.join(client.domestic.host, "search")
|
||||
|
||||
if size:
|
||||
url += "?" + urlencode({"size": size})
|
||||
|
||||
search_resp = client.domestic.call(
|
||||
url=url,
|
||||
method="get",
|
||||
params={"postcode": postcode},
|
||||
)
|
||||
|
||||
results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"])
|
||||
|
||||
row_count = len(results_df)
|
||||
|
||||
# If we hit the size limit, there *may* be more results
|
||||
if row_count == size:
|
||||
print(
|
||||
f"⚠️ Warning: hit size limit ({size}) for postcode '{postcode}'. "
|
||||
f"Attempt {attempt}/{max_attempts}."
|
||||
)
|
||||
|
||||
if attempt < max_attempts:
|
||||
print(f"🔁 Retrying with size={size * 2}")
|
||||
return get_epc_data_with_postcode(
|
||||
postcode=postcode,
|
||||
size=size * 2,
|
||||
attempt=attempt + 1,
|
||||
max_attempts=max_attempts,
|
||||
)
|
||||
else:
|
||||
print(
|
||||
"🚨 Max attempts reached. Results may be truncated. "
|
||||
"(Please do a manual review by the tech team.)"
|
||||
)
|
||||
|
||||
return results_df
|
||||
|
||||
|
||||
def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
|
||||
"""
|
||||
Returns True if all non-null UPRNs in df match the given uprn.
|
||||
Returns False otherwise.
|
||||
"""
|
||||
|
||||
if column not in df.columns:
|
||||
return False
|
||||
|
||||
# Drop nulls and normalise to string
|
||||
uprns = df[column].dropna().astype(str).str.strip().unique()
|
||||
|
||||
# No valid UPRNs to compare
|
||||
if len(uprns) == 0:
|
||||
return False
|
||||
|
||||
# Exactly one unique UPRN and it matches
|
||||
return len(uprns) == 1 and uprns[0] == str(uprn)
|
||||
|
||||
|
||||
def get_uprn_candidates(
|
||||
df: pd.DataFrame,
|
||||
user_address: str,
|
||||
address_column: str = "address",
|
||||
uprn_column: str = "uprn",
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Annotate EPC results with lexicographical similarity scores and ranks.
|
||||
|
||||
Returns a DataFrame sorted by descending lexiscore.
|
||||
DOES NOT choose or return a UPRN.
|
||||
"""
|
||||
|
||||
if address_column not in df.columns:
|
||||
raise ValueError(f"Missing column: {address_column}")
|
||||
|
||||
if uprn_column not in df.columns:
|
||||
raise ValueError(f"Missing column: {uprn_column}")
|
||||
|
||||
out = df.copy()
|
||||
|
||||
user_norm = normalise_address(user_address)
|
||||
|
||||
out["lexiscore"] = out[address_column].apply(lambda x: levenshtein(user_norm, x))
|
||||
|
||||
# Normalise UPRN to string
|
||||
out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
|
||||
|
||||
# Rank: 1 = best match
|
||||
out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
|
||||
|
||||
return out.sort_values(
|
||||
["lexirank", "lexiscore"],
|
||||
ascending=[True, False],
|
||||
)
|
||||
|
||||
|
||||
def get_uprn(user_inputed_address: str, postcode: str):
|
||||
"""
|
||||
Return uprn (str)
|
||||
Return False if failed to find a sensible matching epc
|
||||
Return Nons when epc found but no UPRN
|
||||
"""
|
||||
df = get_epc_data_with_postcode(postcode=postcode)
|
||||
|
||||
if df.empty:
|
||||
return None
|
||||
|
||||
scored_df = get_uprn_candidates(
|
||||
df,
|
||||
user_address=user_inputed_address,
|
||||
)
|
||||
|
||||
# Best score
|
||||
best_score = scored_df.iloc[0]["lexiscore"]
|
||||
|
||||
if best_score <= 0:
|
||||
return None
|
||||
|
||||
# All rank-1 rows (possible draw)
|
||||
top_rank_df = scored_df[scored_df["lexirank"] == 1]
|
||||
|
||||
# If rank-1 rows do not agree on a single UPRN → ambiguous
|
||||
if not df_has_single_uprn(top_rank_df, uprn=top_rank_df.iloc[0]["uprn"]):
|
||||
return None
|
||||
|
||||
address = top_rank_df["address"].values[0]
|
||||
lexiscore = float(top_rank_df["lexiscore"].values[0])
|
||||
|
||||
logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
|
||||
# Safe to return the agreed UPRN
|
||||
found_uprn = top_rank_df.iloc[0]["uprn"]
|
||||
|
||||
if found_uprn == "":
|
||||
return None
|
||||
|
||||
return found_uprn
|
||||
|
||||
|
||||
def resolve_uprns_for_postcode_group(
|
||||
group_df: pd.DataFrame,
|
||||
epc_df: pd.DataFrame,
|
||||
address_col: str = "Address 1",
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Given:
|
||||
- group_df: rows sharing the same postcode
|
||||
- epc_df: EPC search results for that postcode
|
||||
|
||||
Returns:
|
||||
group_df + found_uprn + diagnostics
|
||||
"""
|
||||
|
||||
results = []
|
||||
|
||||
for _, row in group_df.iterrows():
|
||||
user_address = str(row[address_col]).strip()
|
||||
|
||||
scored_df = get_uprn_candidates(
|
||||
epc_df,
|
||||
user_address=user_address,
|
||||
)
|
||||
|
||||
if scored_df.empty:
|
||||
results.append(
|
||||
{
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": None,
|
||||
"best_match_address": None,
|
||||
"best_match_lexiscore": None,
|
||||
"status": "no_epc_candidates",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
best_score = scored_df.iloc[0]["lexiscore"]
|
||||
|
||||
if best_score <= 0:
|
||||
results.append(
|
||||
{
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": None,
|
||||
"best_match_address": None,
|
||||
"best_match_lexiscore": best_score,
|
||||
"status": "zero_score",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
top_rank_df = scored_df[scored_df["lexirank"] == 1]
|
||||
|
||||
if not df_has_single_uprn(top_rank_df, top_rank_df.iloc[0]["uprn"]):
|
||||
results.append(
|
||||
{
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": top_rank_df.iloc[0]["uprn"],
|
||||
"best_match_address": top_rank_df.iloc[0]["address"],
|
||||
"best_match_lexiscore": best_score,
|
||||
"status": "ambiguous",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
results.append(
|
||||
{
|
||||
"found_uprn": str(top_rank_df.iloc[0]["uprn"]),
|
||||
"best_match_uprn": str(top_rank_df.iloc[0]["uprn"]),
|
||||
"best_match_address": top_rank_df.iloc[0]["address"],
|
||||
"best_match_lexiscore": best_score,
|
||||
"status": "matched",
|
||||
}
|
||||
)
|
||||
|
||||
return pd.concat(
|
||||
[group_df.reset_index(drop=True), pd.DataFrame(results)],
|
||||
axis=1,
|
||||
)
|
||||
|
||||
|
||||
def test(a, b):
|
||||
assert a == b, f"erorr: {a}{type(a)} != {b}: {type(b)}"
|
||||
|
||||
|
||||
def run_all_test():
|
||||
# Basic usage with different post codes styles
|
||||
test(get_epc_data_with_postcode("b93 8sy").shape[0], 63)
|
||||
test(get_epc_data_with_postcode("B938sy").shape[0], 63)
|
||||
test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
|
||||
test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
|
||||
|
||||
test(get_uprn("68", "b93 8sy"), "100070989938")
|
||||
test(get_uprn("68 Glendon Way", "b93 8sy"), "100070989938")
|
||||
test(get_uprn("Flat A, 28, Nelgarde Road", "se6 4tf"), "100023278633")
|
||||
test(get_uprn("28 A", "se6 4tf"), "100023278633")
|
||||
test(get_uprn("28A", "se6 4tf"), "100023278633")
|
||||
test(get_uprn("6 Aitken Close", "E8 4SQ"), False)
|
||||
|
||||
# unique case
|
||||
test(get_uprn("Flat 5, 1, Semley Gate", "e9 5nh"), "10008238198")
|
||||
test(get_uprn("5 , 1 Semley Gate", "e9 5nh"), "10008238198")
|
||||
test(get_uprn("5 Semley Gate", "e9 5nh"), "10008238198")
|
||||
test(get_uprn("1, 5 Semley Gate", "e9 5nh"), False)
|
||||
test(
|
||||
get_uprn("1 Semley Gate", "e9 5nh"), "10008238188"
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
test(
|
||||
get_uprn("48 Oswald Street", "E5 0BT"), False
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
test(
|
||||
get_uprn("42 Oswald Street", "E5 0BT"), False
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
test(
|
||||
get_uprn("46 Oswald Street", "E5 0BT"), False
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street")
|
||||
get_uprn_candidates(
|
||||
get_epc_data_with_postcode("Cr2 7dl"),
|
||||
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
INPUT_FILE = "hackney.xlsx"
|
||||
|
||||
ADDRESS_COL = "Address 1"
|
||||
POSTCODE_COL = "Postcode"
|
||||
UPRN_COL = "UPRN"
|
||||
|
||||
df = pd.read_excel(INPUT_FILE)
|
||||
|
||||
failures = []
|
||||
|
||||
for _, row in tqdm(
|
||||
df.iterrows(),
|
||||
total=len(df),
|
||||
desc="Auditing UPRNs",
|
||||
):
|
||||
input_address = str(row[ADDRESS_COL]).strip()
|
||||
postcode = str(row[POSTCODE_COL]).strip()
|
||||
|
||||
expected_uprn = None if pd.isna(row[UPRN_COL]) else str(int(row[UPRN_COL]))
|
||||
|
||||
try:
|
||||
epc_df = get_epc_data_with_postcode(postcode)
|
||||
|
||||
if epc_df.empty:
|
||||
failures.append(
|
||||
{
|
||||
**row.to_dict(),
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": None,
|
||||
"best_match_address": None,
|
||||
"best_match_lexiscore": None,
|
||||
"status": "no_epc_results",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
scored_df = get_uprn_candidates(
|
||||
epc_df,
|
||||
user_address=input_address,
|
||||
)
|
||||
|
||||
best_row = scored_df.iloc[0]
|
||||
|
||||
best_match_uprn = str(best_row["uprn"])
|
||||
best_match_address = best_row["address"]
|
||||
best_match_lexiscore = round(float(best_row["lexiscore"]), 4)
|
||||
|
||||
found_uprn = get_uprn(input_address, postcode)
|
||||
|
||||
except Exception as e:
|
||||
failures.append(
|
||||
{
|
||||
**row.to_dict(),
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": None,
|
||||
"best_match_address": None,
|
||||
"best_match_lexiscore": None,
|
||||
"status": "exception",
|
||||
"error": str(e),
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
found_uprn_norm = None if not found_uprn else str(found_uprn)
|
||||
|
||||
if found_uprn_norm != expected_uprn:
|
||||
failures.append(
|
||||
{
|
||||
**row.to_dict(),
|
||||
"found_uprn": found_uprn_norm,
|
||||
"best_match_uprn": best_match_uprn,
|
||||
"best_match_address": best_match_address,
|
||||
"best_match_lexiscore": best_match_lexiscore,
|
||||
"status": ("no_match" if found_uprn_norm is None else "mismatch"),
|
||||
}
|
||||
)
|
||||
|
||||
failures_df = pd.DataFrame(failures)
|
||||
|
||||
print("===================================")
|
||||
print(f"Total rows : {len(df)}")
|
||||
print(f"Failures : {len(failures_df)}")
|
||||
print("===================================")
|
||||
|
||||
failures_df.to_excel(
|
||||
"hackney_uprn_failures.xlsx",
|
||||
index=False,
|
||||
)
|
||||
|
||||
|
||||
def handler(event, context):
|
||||
print("hello world")
|
||||
return {"statusCode": 200, "body": "hello world"}
|
||||
|
||||
|
||||
# TO do function dispatcher,
|
||||
|
||||
# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate)
|
||||
# fix that
|
||||
# Look again at flat 1
|
||||
# pandas reader the seperate postcode_splitter
|
||||
# dump into s3
|
||||
17
backend/address2UPRN/script.py
Normal file
17
backend/address2UPRN/script.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
import pandas as pd
|
||||
|
||||
|
||||
# use Address 1
|
||||
junte_df = pd.read_excel("hackney_uprn_failures.xlsx")
|
||||
|
||||
|
||||
# use domna_address_1
|
||||
khalim_df = pd.read_excel("khalim_standard.xlsx")
|
||||
|
||||
|
||||
combined_df = junte_df.merge(khalim_df, how="left", left_on="Address 1", right_on='domna_address_1')
|
||||
|
||||
# Find the row in khalim_df that does not app
|
||||
|
||||
result = combined_df[~pd.isnull(combined_df["epc_os_uprn"])]
|
||||
|
||||
40
backend/address2UPRN/tests/test_csv.py
Normal file
40
backend/address2UPRN/tests/test_csv.py
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# tests/test_address_to_uprn_csv.py
|
||||
|
||||
import csv
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
from backend.address2UPRN.main import get_uprn
|
||||
|
||||
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
|
||||
|
||||
|
||||
def load_test_cases():
|
||||
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
|
||||
reader = csv.DictReader(f)
|
||||
return [
|
||||
pytest.param(
|
||||
row["User Input"],
|
||||
row["Postcode"],
|
||||
row["Manual UPRN Code"],
|
||||
id=f'{row["User Input"]} [{row["Postcode"]}]',
|
||||
)
|
||||
for row in reader
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"user_input,postcode,expected_uprn",
|
||||
load_test_cases(),
|
||||
)
|
||||
def test_uprn_resolution_matches_manual(
|
||||
user_input: str,
|
||||
postcode: str,
|
||||
expected_uprn: str,
|
||||
):
|
||||
from utils.logger import setup_logger
|
||||
|
||||
uprn = get_uprn(user_input, postcode)
|
||||
if uprn:
|
||||
assert uprn == expected_uprn
|
||||
else:
|
||||
assert str(uprn) == expected_uprn
|
||||
366
backend/address2UPRN/tests/test_data.csv
Normal file
366
backend/address2UPRN/tests/test_data.csv
Normal file
|
|
@ -0,0 +1,366 @@
|
|||
User Input,Postcode,Manual UPRN Code
|
||||
47 The Fairway,OX16 0RR,100120771697
|
||||
11 REGENT COURT,SL1 3LG,100081041562
|
||||
3/137a Windmill Road,TW8 9NH,100021516998
|
||||
Flat 33,SW18 4BE,100023328943
|
||||
FLAT 1 Brendon Grove,N2 8JE,200013412
|
||||
Flat 15,KT8 2NE,100062123759
|
||||
FLAT 5 Stonehill Road,W4 3AH,100021589829
|
||||
10 Douglas Court,SL7 1UQ,100081278099
|
||||
1 Windmill Road,HP17 8JA,766034606
|
||||
31 Denewood,HP13 7LH,100081095964
|
||||
"10, Greenways Drive",TW4 5DD,10091597009
|
||||
Flat 10,W4 3AH,"100021589834"
|
||||
Flat 11,TW4 5DD,10091597010
|
||||
Flat 11,W4 3AH,100021589835
|
||||
"12, Greenways Drive",TW4 5DD,10091597011
|
||||
"Flat 12, Forbes House",W4 3AH,100021589836
|
||||
FLAT 1 Goodstone Court,HA1 4FL,10070269053
|
||||
Flat 13,TW4 5DD,10091597012
|
||||
Flat 13,W4 3AH,100021589837
|
||||
Flat 14,TW4 5DD,10091597013
|
||||
Flat 14,W4 3AH,100021589838
|
||||
Flat 15,TW4 5DD,10091597014
|
||||
Flat 15,W4 3AH,100021589839
|
||||
Flat 16,TW4 5DD,"10091597015"
|
||||
Flat 16,W4 3AH,100021589840
|
||||
Flat 17,TW4 5DD,10091597016
|
||||
Flat 17,W4 3AH,100021589841
|
||||
Flat 18,TW4 5DD,10091597017
|
||||
Flat 19,W4 3AH,100021589843
|
||||
Flat 20,W4 3AH,100021589844
|
||||
Flat 21,W4 3AH,100021589845
|
||||
Flat 22,W4 3AH,100021589846
|
||||
FLAT 2 Goodstone Court,HA1 4FL,10070269054
|
||||
Flat 23,W4 3AH,100021589847
|
||||
Flat 24,W4 3AH,100021589848
|
||||
"30c, Bosanquet Close",UB8 3PE,100021475316
|
||||
"30e, Bosanquet Close",UB8 3PE,100021475318
|
||||
FLAT 3 Goodstone Court,HA1 4FL,10070269055
|
||||
FLAT 4 Goodstone Court,HA1 4FL,10070269056
|
||||
FLAT 5 Goodstone Court,HA1 4FL,10070269057
|
||||
FLAT 6 Goodstone Court,HA1 4FL,10070269058
|
||||
FLAT 7 Goodstone Court,HA1 4FL,10070269059
|
||||
FLAT 8 Goodstone Court,HA1 4FL,10070269060
|
||||
FLAT 9 Goodstone Court,HA1 4FL,10070269061
|
||||
FLAT 10 Goodstone Court,HA1 4FL,10070269062
|
||||
FLAT 11 Goodstone Court,HA1 4FL,10070269063
|
||||
FLAT 12 Goodstone Court,HA1 4FL,10070269064
|
||||
FLAT 13 Goodstone Court,HA1 4FL,10070269065
|
||||
FLAT 14 Goodstone Court,HA1 4FL,10070269066
|
||||
FLAT 15 Goodstone Court,HA1 4FL,10070269067
|
||||
FLAT 16 Goodstone Court,HA1 4FL,10070269068
|
||||
FLAT 17 Goodstone Court,HA1 4FL,10070269069
|
||||
FLAT 18 Goodstone Court,HA1 4FL,10070269070
|
||||
FLAT 19 Goodstone Court,HA1 4FL,10070269071
|
||||
FLAT 20 Goodstone Court,HA1 4FL,10070269072
|
||||
FLAT 21 Goodstone Court,HA1 4FL,10070269073
|
||||
FLAT 22 Goodstone Court,HA1 4FL,10070269074
|
||||
FLAT 23 Goodstone Court,HA1 4FL,10070269075
|
||||
FLAT 24 Goodstone Court,HA1 4FL,10070269076
|
||||
FLAT 25 Goodstone Court,HA1 4FL,10070269077
|
||||
FLAT 26 Goodstone Court,HA1 4FL,10070269078
|
||||
FLAT 27 Goodstone Court,HA1 4FL,10070269079
|
||||
FLAT 28 Goodstone Court,HA1 4FL,10070269080
|
||||
FLAT 29 Goodstone Court,HA1 4FL,10070269081
|
||||
FLAT 30 Goodstone Court,HA1 4FL,10070269082
|
||||
FLAT 31 Goodstone Court,HA1 4FL,10070269083
|
||||
FLAT 32 Goodstone Court,HA1 4FL,10070269084
|
||||
FLAT 33 Goodstone Court,HA1 4FL,10070269085
|
||||
FLAT 34 Goodstone Court,HA1 4FL,10070269086
|
||||
FLAT 35 Goodstone Court,HA1 4FL,10070269087
|
||||
FLAT 36 Goodstone Court,HA1 4FL,10070269088
|
||||
FLAT 37 Goodstone Court,HA1 4FL,10070269089
|
||||
FLAT 38 Goodstone Court,HA1 4FL,10070269090
|
||||
FLAT 39 Goodstone Court,HA1 4FL,10070269091
|
||||
FLAT 40 Goodstone Court,HA1 4FL,10070269092
|
||||
FLAT 41 Goodstone Court,HA1 4FL,10070269093
|
||||
FLAT 42 Goodstone Court,HA1 4FL,10070269094
|
||||
FLAT 43 Goodstone Court,HA1 4FL,10070269095
|
||||
"13 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778260
|
||||
"14 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778259
|
||||
"15 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778258
|
||||
"16 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778263
|
||||
"17 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778262
|
||||
"18 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778261
|
||||
"19 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778266
|
||||
"20 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778265
|
||||
"21 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778264
|
||||
90a Murray Road,W5 4DA,12135293
|
||||
"Flat 1, 6 Wolverton Gardens",W5 3LJ,"12119972"
|
||||
"1, Monsted House",UB1 1FG,12189944
|
||||
"10, Monsted House",UB1 1FG,12189953
|
||||
"20, Monsted House",UB1 1FG,12189963
|
||||
"2, Monsted House",UB1 1FG,12189945
|
||||
"3, Monsted House",UB1 1FG,12189946
|
||||
"4, Monsted House",UB1 1FG,12189947
|
||||
"5, Monsted House",UB1 1FG,12189948
|
||||
"6, Monsted House",UB1 1FG,12189949
|
||||
"7, Monsted House",UB1 1FG,12189950
|
||||
"8, Monsted House",UB1 1FG,12189951
|
||||
"9, Monsted House",UB1 1FG,12189952
|
||||
"1 Cullis House, 1, Accolade Avenue",UB1 1FH,12189904
|
||||
"2 Cullis House, 1, Accolade Avenue",UB1 1FH,12189905
|
||||
"3 Cullis House, 1, Accolade Avenue",UB1 1FH,12189906
|
||||
"4 Cullis House, 1, Accolade Avenue",UB1 1FH,12189907
|
||||
"5 Cullis House, 1, Accolade Avenue",UB1 1FH,12189908
|
||||
"6 Cullis House, 1, Accolade Avenue",UB1 1FH,12189909
|
||||
1 Genteel House Samara Drive,UB1 1FJ,12189835
|
||||
2 Genteel House Samara Drive,UB1 1FJ,12189836
|
||||
3 Genteel House Samara Drive,UB1 1FJ,12189837
|
||||
4 Genteel House Samara Drive,UB1 1FJ,12189838
|
||||
5 Genteel House Samara Drive,UB1 1FJ,12189839
|
||||
6 Genteel House Samara Drive,UB1 1FJ,12189840
|
||||
7 Genteel House Samara Drive,UB1 1FJ,12189841
|
||||
8 Genteel House Samara Drive,UB1 1FJ,12189842
|
||||
9 Genteel House Samara Drive,UB1 1FJ,12189843
|
||||
10 Genteel House Samara Drive,UB1 1FJ,12189844
|
||||
1 ASH TREE HOUSE,SE5 0TE,None
|
||||
"Flat 1 Ash Tree House, 2, Thompson Avenue",SE5 0TE,10009803979
|
||||
3 ASH TREE HOUSE,SE5 0TE,None
|
||||
Flat 3 ASH TREE HOUSE,SE5 0TE,10009803981
|
||||
5 ASH TREE HOUSE,SE5 0TE,None
|
||||
Flat 5 ASH TREE HOUSE,SE5 0TE,10009803983
|
||||
Flat 8 ASH TREE HOUSE,SE5 0TE,10009803986
|
||||
8 ASH TREE HOUSE,SE5 0TE,None
|
||||
Flat 12 ASH TREE HOUSE,SE5 0TE,10009803990
|
||||
12 ASH TREE HOUSE,SE5 0TE,None
|
||||
FLAT 1 599 HARROW ROAD,W10 4RA,217113930
|
||||
FLAT 2 599 HARROW ROAD,W10 4RA,217113931
|
||||
FLAT 3 599 HARROW ROAD,W10 4RA,None
|
||||
FLAT 4 599 HARROW ROAD,W10 4RA,None
|
||||
FLAT 5 599 HARROW ROAD,W10 4RA,217113934
|
||||
FLAT 6 599 HARROW ROAD,W10 4RA,None
|
||||
FLAT 7 599 HARROW ROAD,W10 4RA,None
|
||||
FLAT 8 599 HARROW ROAD,W10 4RA,None
|
||||
"Flat 1, Ohio Building",SE13 7RX,10023226256
|
||||
"Flat 2, Ohio Building",SE13 7RX,10023226257
|
||||
"Apartment 1 Block B, 105, Benwell Road",N7 7BW,10012792307
|
||||
"Apartment 2 Block B, 105, Benwell Road",N7 7BW,10012792308
|
||||
"Apartment 3 Block B, 105, Benwell Road",N7 7BW,10012792309
|
||||
"Apartment 4 Block B, 105, Benwell Road",N7 7BW,10012792310
|
||||
"Apartment 5 Block B, 105, Benwell Road",N7 7BW,10012792311
|
||||
"Apartment 6 Block B, 105, Benwell Road",N7 7BW,10012792312
|
||||
"Apartment 7 Block B, 105, Benwell Road",N7 7BW,10012792313
|
||||
"Apartment 8 Block B, 105, Benwell Road",N7 7BW,10012792314
|
||||
"Apartment 9 Block B, 105, Benwell Road",N7 7BW,10012792315
|
||||
"Apartment 10 Block B, 105, Benwell Road",N7 7BW,10012792316
|
||||
"Apartment 11 Block B, 105, Benwell Road",N7 7BW,10012792317
|
||||
"Apartment 12 Block B, 105, Benwell Road",N7 7BW,10012792318
|
||||
"Apartment 13 Block B, 105, Benwell Road",N7 7BW,10012792319
|
||||
"Apartment 1 Block D, 32, Hornsey Road",N7 7AT,10012792366
|
||||
"Apartment 2 Block D, 32, Hornsey Road",N7 7AT,10012792367
|
||||
"Apartment 3 Block D, 32, Hornsey Road",N7 7AT,10012792368
|
||||
"Apartment 4 Block D, 32, Hornsey Road",N7 7AT,10012792369
|
||||
"Apartment 5 Block D, 32, Hornsey Road",N7 7AT,10012792370
|
||||
"Apartment 6 Block D, 32, Hornsey Road",N7 7AT,"10012792371"
|
||||
"Apartment 7 Block D, 32, Hornsey Road",N7 7AT,10012792372
|
||||
"Apartment 8 Block D, 32, Hornsey Road",N7 7AT,10012792373
|
||||
"Apartment 9 Block D, 32, Hornsey Road",N7 7AT,10012792374
|
||||
"Apartment 10 Block D, 32, Hornsey Road",N7 7AT,10012792375
|
||||
"Apartment 11 Block D, 32, Hornsey Road",N7 7AT,10012792376
|
||||
"Apartment 12 Block D, 32, Hornsey Road",N7 7AT,10012792377
|
||||
"Apartment 13 Block D, 32, Hornsey Road",N7 7AT,10012792378
|
||||
"Apartment 14 Block D, 32, Hornsey Road",N7 7AT,10012792379
|
||||
"Apartment 15 Block D, 32, Hornsey Road",N7 7AT,10012792380
|
||||
"Apartment 16 Block D, 32, Hornsey Road",N7 7AT,"10012792381"
|
||||
"Apartment 17Block D, 32, Hornsey Road",N7 7AT,10012792382
|
||||
"Apartment 18 Block D, 32, Hornsey Road",N7 7AT,10012792383
|
||||
24b Honley Road,SE6 2HZ,None
|
||||
FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
|
||||
2 COLLEGE HOUSE,CM7 1JS,100091449870
|
||||
3 COLLEGE HOUSE,CM7 1JS,100091449871
|
||||
1 Anita Street,M4 5DU,None
|
||||
2 Anita Street,M4 5DU,77123061
|
||||
5 Anita Street,M4 5DU,77123081
|
||||
6 Anita Street,M4 5DU,77123082
|
||||
8 Anita Street,M4 5DU,None
|
||||
9 Anita Street,M4 5DU,None
|
||||
10 Anita Street,M4 5DU,77123051
|
||||
12 Anita Street,M4 5DU,77123053
|
||||
19 Anita Street,M4 5DU,None
|
||||
22 Anita Street,M4 5DU,None
|
||||
26 Anita Street,M4 5DU,77123068
|
||||
28 Anita Street,M4 5DU,None
|
||||
30 Anita Street,M4 5DU,None
|
||||
32 Anita Street,M4 5DU,None
|
||||
33 Anita Street,M4 5DU,77123076
|
||||
34 Anita Street,M4 5DU,None
|
||||
35 Anita Street,M4 5DU,77123078
|
||||
36 Anita Street,M4 5DU,77123079
|
||||
23 George Leigh Street,M4 5DR,77123171
|
||||
25 George Leigh Street,M4 5DR,None
|
||||
35 George Leigh Street,M4 5DR,77123177
|
||||
39 George Leigh Street,M4 5DR,77123179
|
||||
41 George Leigh Street,M4 5DR,None
|
||||
43 George Leigh Street,M4 5DR,None
|
||||
49 George Leigh Street,M4 5DR,None
|
||||
51 George Leigh Street,M4 5DR,77123185
|
||||
55 George Leigh Street,M4 5DR,None
|
||||
57 George Leigh Street,M4 5DR,None
|
||||
"1a, Victoria Square",M4 5DX,77211153
|
||||
2a Victoria Square ,M4 5DX,None
|
||||
"4a, Victoria Square",M4 5DX,77211155
|
||||
5a Victoria Square,M4 5DX,77211156
|
||||
6a Victoria Square,M4 5DX,77211157
|
||||
7a Victoria Square,M4 5DX,77211158
|
||||
8a Victoria Square,M4 5DX,77211159
|
||||
9a Victoria Square,M4 5DX,77211160
|
||||
10a Victoria Square,M4 5DX,77211161
|
||||
11a Victoria Square,M4 5DX,77211162
|
||||
12a Victoria Square,M4 5DX,77211163
|
||||
13a Victoria Square,M4 5DX,77211164
|
||||
14a Victoria Square,M4 5DX,77211165
|
||||
15a Victoria Square,M4 5DX,77211166
|
||||
16a Victoria Square,M4 5DX,77211167
|
||||
17a Victoria Square,M4 5DX,77211168
|
||||
18a Victoria Square,M4 5DX,77211169
|
||||
19a Victoria Square,M4 5DX,77211170
|
||||
20a Victoria Square,M4 5DX,77211171
|
||||
21a Victoria Square,M4 5DY,77211172
|
||||
22a Victoria Square,M4 5DY,None
|
||||
23a Victoria Square,M4 5DY,77211174
|
||||
24a Victoria Square,M4 5DY,77211175
|
||||
25a Victoria Square,M4 5DY,77211176
|
||||
26a Victoria Square,M4 5DY,77211177
|
||||
27a Victoria Square,M4 5DY,77211178
|
||||
28a Victoria Square,M4 5DY,None
|
||||
29a Victoria Square,M4 5DY,77211180
|
||||
30a Victoria Square,M4 5DY,77211181
|
||||
31a Victoria Square,M4 5DY,77211182
|
||||
32a Victoria Square,M4 5DY,77211183
|
||||
33a Victoria Square,M4 5DY,77211184
|
||||
34a Victoria Square,M4 5DY,77211185
|
||||
35a Victoria Square,M4 5DY,None
|
||||
36a Victoria Square,M4 5DY,77211187
|
||||
37a Victoria Square,M4 5DY,77211188
|
||||
38a Victoria Square,M4 5DY,77211189
|
||||
39a Victoria Square,M4 5DY,77211190
|
||||
40a Victoria Square,M4 5DY,None
|
||||
41a Victoria Square,M4 5DY,77211192
|
||||
42a Victoria Square,M4 5DY,77211193
|
||||
43a Victoria Square,M4 5DY,77211194
|
||||
44a Victoria Square,M4 5DY,77211195
|
||||
45a Victoria Square,M4 5DY,77211196
|
||||
46a Victoria Square,M4 5DY,77211197
|
||||
47a Victoria Square,M4 5DY,77211198
|
||||
48a Victoria Square,M4 5DY,77211199
|
||||
49a Victoria Square,M4 5DY,77211200
|
||||
50a Victoria Square,M4 5DY,77211201
|
||||
51a Victoria Square,M4 5DY,77211202
|
||||
52a Victoria Square,M4 5DY,77211203
|
||||
53a Victoria Square,M4 5DY,77211204
|
||||
54a Victoria Square,M4 5DY,77211205
|
||||
55a Victoria Square,M4 5DY,77211206
|
||||
56a Victoria Square,M4 5DZ,77211207
|
||||
57a Victoria Square,M4 5DZ,None
|
||||
58a Victoria Square,M4 5DZ,77211209
|
||||
59a Victoria Square,M4 5DZ,77211210
|
||||
60a Victoria Square,M4 5DZ,77211211
|
||||
61a Victoria Square,M4 5DZ,77211212
|
||||
62a Victoria Square,M4 5DZ,77211213
|
||||
63a Victoria Square,M4 5DZ,None
|
||||
64a Victoria Square,M4 5DZ,77211215
|
||||
65a Victoria Square,M4 5DZ,77211216
|
||||
66a Victoria Square,M4 5DZ,None
|
||||
67a Victoria Square,M4 5DZ,None
|
||||
68a Victoria Square,M4 5DZ,77211219
|
||||
69a Victoria Square,M4 5DZ,77211220
|
||||
70a Victoria Square,M4 5DZ,77211221
|
||||
71a Victoria Square,M4 5DZ,77211222
|
||||
72a Victoria Square,M4 5DZ,77211223
|
||||
73a Victoria Square,M4 5DZ,77211224
|
||||
74a Victoria Square,M4 5DZ,None
|
||||
75a Victoria Square,M4 5DZ,77211226
|
||||
76a Victoria Square,M4 5DZ,77211227
|
||||
77a Victoria Square,M4 5DZ,None
|
||||
78a Victoria Square,M4 5DZ,77211229
|
||||
79a Victoria Square,M4 5DZ,77211230
|
||||
80a Victoria Square,M4 5DZ,77211231
|
||||
81a Victoria Square,M4 5DZ,77211232
|
||||
82 Victoria Square,M4 5DZ,None
|
||||
83a Victoria Square,M4 5DZ,77211234
|
||||
84a Victoria Square,M4 5DZ,None
|
||||
85a Victoria Square,M4 5DZ,77211236
|
||||
86a Victoria Square,M4 5DZ,77211237
|
||||
87a Victoria Square,M4 5DZ,77211238
|
||||
88a Victoria Square,M4 5DZ,None
|
||||
89a Victoria Square,M4 5DZ,77211240
|
||||
90a Victoria Square,M4 5DZ,77211241
|
||||
91a Victoria Square,M4 5DZ,77211242
|
||||
92a Victoria Square,M4 5DZ,77211243
|
||||
93a Victoria Square,M4 5EA,77211244
|
||||
94a Victoria Square,M4 5EA,None
|
||||
95a Victoria Square,M4 5EA,77211246
|
||||
96a Victoria Square,M4 5EA,77211247
|
||||
97a Victoria Square,M4 5EA,77211248
|
||||
98a Victoria Square,M4 5EA,77211249
|
||||
99a Victoria Square,M4 5EA,77211250
|
||||
100a Victoria Square,M4 5EA,77211251
|
||||
101a Victoria Square,M4 5EA,None
|
||||
102a Victoria Square,M4 5EA,None
|
||||
103a Victoria Square,M4 5EA,77211254
|
||||
104a Victoria Square,M4 5EA,77211255
|
||||
105a Victoria Square,M4 5EA,None
|
||||
106a Victoria Square,M4 5EA,77211257
|
||||
107a Victoria Square,M4 5EA,77211258
|
||||
108a Victoria Square,M4 5EA,77211259
|
||||
109a Victoria Square,M4 5EA,77211260
|
||||
110a Victoria Square,M4 5EA,77211261
|
||||
111a Victoria Square,M4 5EA,77211262
|
||||
112a Victoria Square,M4 5EA,None
|
||||
113a Victoria Square,M4 5EA,77211264
|
||||
114a Victoria Square,M4 5EA,77211265
|
||||
115a Victoria Square,M4 5EA,77211266
|
||||
116a Victoria Square,M4 5EA,77211267
|
||||
117a Victoria Square,M4 5EA,None
|
||||
118a Victoria Square,M4 5EA,None
|
||||
119a Victoria Square,M4 5EA,77211270
|
||||
120a Victoria Square,M4 5EA,77211271
|
||||
121a Victoria Square,M4 5EA,77211272
|
||||
122a Victoria Square,M4 5EA,77211273
|
||||
123a Victoria Square,M4 5EA,77211274
|
||||
124a Victoria Square,M4 5EA,None
|
||||
125a Victoria Square,M4 5EA,77211276
|
||||
126a Victoria Square,M4 5EA,77211277
|
||||
127a Victoria Square,M4 5EA,77211278
|
||||
128a Victoria Square,M4 5EA,77211279
|
||||
129a Victoria Square,M4 5EA,77211280
|
||||
130a Victoria Square,M4 5FA,77211281
|
||||
131a Victoria Square,M4 5FA,77211282
|
||||
132a Victoria Square,M4 5FA,77211283
|
||||
133a Victoria Square,M4 5FA,None
|
||||
134a Victoria Square,M4 5FA,77211285
|
||||
135a Victoria Square,M4 5FA,77211286
|
||||
136a Victoria Square,M4 5FA,77211287
|
||||
137a Victoria Square,M4 5FA,77211288
|
||||
138a Victoria Square,M4 5FA,77211289
|
||||
139a Victoria Square,M4 5FA,77211290
|
||||
140a Victoria Square,M4 5FA,77211291
|
||||
141a Victoria Square,M4 5FA,77211292
|
||||
142a Victoria Square,M4 5FA,77211293
|
||||
143a Victoria Square,M4 5FA,77211294
|
||||
144a Victoria Square,M4 5FA,77211295
|
||||
145a Victoria Square,M4 5FA,None
|
||||
146a Victoria Square,M4 5FA,77211297
|
||||
147a Victoria Square,M4 5FA,77211298
|
||||
148a Victoria Square,M4 5FA,77211299
|
||||
149a Victoria Square,M4 5FA,77211300
|
||||
150a Victoria Square,M4 5FA,77211301
|
||||
151a Victoria Square,M4 5FA,None
|
||||
152a Victoria Square,M4 5FA,77211303
|
||||
153a Victoria Square,M4 5FA,None
|
||||
154a Victoria Square,M4 5FA,77211305
|
||||
155a Victoria Square,M4 5FA,None
|
||||
156a Victoria Square,M4 5FA,77211307
|
||||
157a Victoria Square,M4 5FA,77211308
|
||||
158a Victoria Square,M4 5FA,77211309
|
||||
159a Victoria Square,M4 5FA,None
|
||||
160a Victoria Square,M4 5FA,77211311
|
||||
161a Victoria Square,M4 5FA,None
|
||||
162a Victoria Square,M4 5FA,None
|
||||
163a Victoria Square,M4 5FA,77211314
|
||||
164a Victoria Square,M4 5FA,77211315
|
||||
165a Victoria Square,M4 5FA,77211316
|
||||
166a Victoria Square,M4 5FA,None
|
||||
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None
|
||||
|
|
|
@ -42,7 +42,7 @@ class Settings(BaseSettings):
|
|||
AWS_DEFAULT_REGION: Optional[str] = None
|
||||
|
||||
class Config:
|
||||
env_file = "backend/.env"
|
||||
env_file = "backend/.env.local"
|
||||
|
||||
|
||||
@lru_cache()
|
||||
|
|
|
|||
|
|
@ -3,7 +3,9 @@ from contextlib import contextmanager
|
|||
from backend.app.config import get_settings
|
||||
from sqlmodel import Session
|
||||
|
||||
connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
|
||||
connection_string = (
|
||||
"postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
|
||||
)
|
||||
db_string = connection_string.format(
|
||||
drivername="psycopg2", # You'll need to use psycopg2 driver for PostgreSQL
|
||||
username=get_settings().DB_USERNAME,
|
||||
|
|
@ -28,7 +30,9 @@ db_engine = create_engine(
|
|||
|
||||
def get_db_session():
|
||||
if db_engine is None:
|
||||
raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.")
|
||||
raise RuntimeError(
|
||||
"Database is not configured. Set DATABASE_URL in environment variables."
|
||||
)
|
||||
return Session(db_engine)
|
||||
|
||||
|
||||
|
|
|
|||
12
backend/app/db/functions/condition_functions.py
Normal file
12
backend/app/db/functions/condition_functions.py
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
from typing import List
|
||||
from sqlalchemy import insert, delete
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from backend.app.db.connection import db_session, db_read_session
|
||||
from backend.app.db.models.condition import PropertyConditionSurveyModel
|
||||
|
||||
|
||||
def bulk_insert_property_surveys(
|
||||
session: Session, surveys: List[PropertyConditionSurveyModel]
|
||||
) -> None:
|
||||
raise NotImplementedError
|
||||
97
backend/app/db/models/condition.py
Normal file
97
backend/app/db/models/condition.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
from sqlalchemy import (
|
||||
BigInteger,
|
||||
Column,
|
||||
Date,
|
||||
ForeignKey,
|
||||
Integer,
|
||||
String,
|
||||
Enum as SqlEnum,
|
||||
)
|
||||
from sqlalchemy.orm import declarative_base, relationship
|
||||
|
||||
from backend.condition.domain.aspect_type import AspectType
|
||||
from backend.condition.domain.element_type import ElementType
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
ElementTypeDb = SqlEnum(
|
||||
ElementType,
|
||||
name="element_type",
|
||||
native_enum=True,
|
||||
values_callable=lambda enum: [e.value for e in enum],
|
||||
)
|
||||
|
||||
AspectTypeDb = SqlEnum(
|
||||
AspectType,
|
||||
name="aspect_type",
|
||||
native_enum=True,
|
||||
values_callable=lambda enum: [a.value for a in enum],
|
||||
)
|
||||
|
||||
|
||||
class PropertyConditionSurveyModel(Base):
|
||||
__tablename__ = "property_condition_survey"
|
||||
|
||||
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||
uprn = Column(BigInteger, nullable=False)
|
||||
|
||||
date = Column(Date, nullable=False)
|
||||
source = Column(String, nullable=False)
|
||||
|
||||
elements = relationship(
|
||||
"ElementModel",
|
||||
back_populates="survey",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class ElementModel(Base):
|
||||
__tablename__ = "element" # TODO: rename to survey_element?
|
||||
|
||||
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||
|
||||
survey_id = Column(
|
||||
BigInteger,
|
||||
ForeignKey("property_condition_survey.id"),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
element_type = Column(ElementTypeDb, nullable=False)
|
||||
element_instance = Column(BigInteger, nullable=False)
|
||||
|
||||
survey = relationship(
|
||||
"PropertyConditionSurveyModel",
|
||||
back_populates="elements",
|
||||
)
|
||||
|
||||
aspect_conditions = relationship(
|
||||
"AspectConditionModel",
|
||||
back_populates="element",
|
||||
cascade="all, delete-orphan",
|
||||
)
|
||||
|
||||
|
||||
class AspectConditionModel(Base):
|
||||
__tablename__ = "aspect_condition" # TODO: rename to survey_aspect?
|
||||
|
||||
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||
|
||||
element_id = Column(
|
||||
BigInteger,
|
||||
ForeignKey("element.id"),
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
aspect_type = Column(AspectTypeDb, nullable=False)
|
||||
aspect_instance = Column(BigInteger, nullable=False)
|
||||
|
||||
value = Column(String)
|
||||
quantity = Column(Integer)
|
||||
install_date = Column(Date)
|
||||
renewal_year = Column(Integer)
|
||||
comments = Column(String)
|
||||
|
||||
element = relationship(
|
||||
"ElementModel",
|
||||
back_populates="aspect_conditions",
|
||||
)
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
# fastapi
|
||||
fastapi==0.115.2
|
||||
sqlalchemy==2.0.36
|
||||
|
|
@ -13,4 +14,3 @@ openpyxl==3.1.2
|
|||
# Basic
|
||||
pytz
|
||||
sqlmodel
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ The processor currently supports file formats provided by **Peabody** and **LBWF
|
|||
|
||||
The `local_runner` script allows the processor to be executed in a local environment.
|
||||
|
||||
1. Copy a sample input file into the `sample_data/` directory.
|
||||
1. Copy sample input file(s) into the `sample_data/` directory. If working with Peabody data, you'll need the Landlord Reference / UPRN lookup file as well.
|
||||
2. Update `local_runner.py` as required, specifically the definitions of:
|
||||
- `lbwf_path`
|
||||
- `peabody_path`
|
||||
|
|
|
|||
|
|
@ -21,6 +21,8 @@ def main() -> None:
|
|||
/ "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx"
|
||||
)
|
||||
filepaths = [lbwf_path, peabody_path]
|
||||
# filepaths = [lbwf_path]
|
||||
# filepaths = [peabody_path]
|
||||
|
||||
for fp in filepaths:
|
||||
with fp.open("rb") as f:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from typing import BinaryIO, Any, Dict, Iterator, List, Tuple
|
||||
from typing import BinaryIO, Any, Dict, Iterator, List, Optional, Tuple
|
||||
from openpyxl import Workbook, load_workbook
|
||||
from collections import defaultdict
|
||||
|
||||
|
|
@ -15,7 +15,11 @@ logger = setup_logger()
|
|||
|
||||
class LbwfParser(Parser):
|
||||
|
||||
def parse(self, file_stream: BinaryIO) -> Any:
|
||||
def parse(
|
||||
self,
|
||||
file_stream: BinaryIO,
|
||||
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
|
||||
) -> Any:
|
||||
wb: Workbook = load_workbook(file_stream)
|
||||
address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(
|
||||
wb
|
||||
|
|
|
|||
|
|
@ -1,8 +1,13 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import BinaryIO, Any
|
||||
from typing import BinaryIO, Any, Dict, Optional
|
||||
|
||||
|
||||
class Parser(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, file_stream: BinaryIO) -> Any:
|
||||
def parse(
|
||||
self,
|
||||
file_stream: BinaryIO,
|
||||
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
|
||||
) -> Any:
|
||||
pass
|
||||
|
|
@ -1,26 +1,55 @@
|
|||
from typing import Any, BinaryIO, Dict, Iterator, List, Tuple, DefaultDict
|
||||
import csv
|
||||
from pathlib import Path
|
||||
from typing import Any, BinaryIO, Dict, List, Optional, Tuple, DefaultDict
|
||||
from openpyxl import Workbook, load_workbook
|
||||
from collections import defaultdict
|
||||
|
||||
from backend.condition.parsing.parser import Parser
|
||||
from backend.condition.parsing.records.peabody.peabody_asset_condition import PeabodyAssetCondition
|
||||
from backend.condition.parsing.records.peabody.peabody_asset_condition import (
|
||||
PeabodyAssetCondition,
|
||||
)
|
||||
from backend.condition.parsing.records.peabody.peabody_property import PeabodyProperty
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class PeabodyParser(Parser):
|
||||
def parse(self, file_stream: BinaryIO) -> Any:
|
||||
def parse(
|
||||
self,
|
||||
file_stream: BinaryIO,
|
||||
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
|
||||
) -> Any:
|
||||
wb: Workbook = load_workbook(file_stream)
|
||||
address_to_uprn_map: Dict[str, int] = PeabodyParser._generate_address_to_uprn_dict(wb)
|
||||
|
||||
assets = self._parse_assets(wb)
|
||||
|
||||
return self._group_assets_into_properties(
|
||||
assets=assets,
|
||||
address_to_uprn_map=address_to_uprn_map,
|
||||
if location_ref_to_uprn_map is None:
|
||||
location_ref_to_uprn_map: Dict[str, int] = (
|
||||
PeabodyParser._build_location_ref_to_uprn_map()
|
||||
)
|
||||
|
||||
assets = PeabodyParser._parse_assets(wb)
|
||||
|
||||
return PeabodyParser._group_assets_into_properties(
|
||||
assets=assets,
|
||||
location_ref_to_uprn_map=location_ref_to_uprn_map,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _build_location_ref_to_uprn_map() -> Dict[str, int]:
|
||||
location_ref_to_uprn_filepath: Path = (
|
||||
Path(__file__).resolve().parents[1]
|
||||
/ "sample_data"
|
||||
/ "peabody"
|
||||
/ "PeabodyPropertymatched_Dec25_propref_UPRN.csv"
|
||||
)
|
||||
location_ref_to_uprn_map: Dict[str, int] = {}
|
||||
|
||||
with location_ref_to_uprn_filepath.open(newline="") as f:
|
||||
reader: Any = csv.DictReader(f)
|
||||
for row in reader:
|
||||
location_ref_to_uprn_map[row["reference"]] = int(row["out_uprn"])
|
||||
|
||||
return location_ref_to_uprn_map
|
||||
|
||||
@staticmethod
|
||||
def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]:
|
||||
|
|
@ -33,7 +62,9 @@ class PeabodyParser(Parser):
|
|||
assets: List[PeabodyAssetCondition] = []
|
||||
for row in asset_rows:
|
||||
try:
|
||||
asset = PeabodyParser._map_row_to_asset_record(row, asset_header_indexes)
|
||||
asset = PeabodyParser._map_row_to_asset_record(
|
||||
row, asset_header_indexes
|
||||
)
|
||||
if not asset.is_block_level:
|
||||
# Block-level condition surveys are out of scope for now
|
||||
# until we have a wider think on how to handle block
|
||||
|
|
@ -48,24 +79,26 @@ class PeabodyParser(Parser):
|
|||
@staticmethod
|
||||
def _group_assets_into_properties(
|
||||
assets: List[PeabodyAssetCondition],
|
||||
address_to_uprn_map: Dict[str, int],
|
||||
location_ref_to_uprn_map: Dict[str, int],
|
||||
) -> List[PeabodyProperty]:
|
||||
assets_by_address: DefaultDict[str, List[PeabodyAssetCondition]] = defaultdict(list)
|
||||
assets_by_location_reference: DefaultDict[str, List[PeabodyAssetCondition]] = (
|
||||
defaultdict(list)
|
||||
)
|
||||
|
||||
for asset in assets:
|
||||
if asset.full_address is None:
|
||||
if asset.lo_reference is None:
|
||||
continue
|
||||
|
||||
address = asset.full_address.strip()
|
||||
assets_by_address[address].append(asset)
|
||||
assets_by_location_reference[asset.lo_reference].append(asset)
|
||||
|
||||
properties: List[PeabodyProperty] = []
|
||||
|
||||
for address, grouped_assets in assets_by_address.items():
|
||||
uprn = address_to_uprn_map.get(address)
|
||||
for location_ref, grouped_assets in assets_by_location_reference.items():
|
||||
|
||||
uprn = location_ref_to_uprn_map.get(location_ref)
|
||||
|
||||
if uprn is None:
|
||||
logger.warning(f"No UPRN found for address: {address}")
|
||||
logger.warning(f"No UPRN found for Location Reference: {location_ref}")
|
||||
continue
|
||||
|
||||
properties.append(
|
||||
|
|
@ -77,7 +110,6 @@ class PeabodyParser(Parser):
|
|||
|
||||
return properties
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _map_row_to_asset_record(
|
||||
row: Any | Tuple[object | None, ...],
|
||||
|
|
@ -102,39 +134,9 @@ class PeabodyParser(Parser):
|
|||
condition_survey_date=row[header_indexes["condition_survey_date"]],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _generate_address_to_uprn_dict(wb: Workbook) -> Dict[str, int | None]:
|
||||
sheet = wb["Survey Records - D & Lower"]
|
||||
rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True)
|
||||
|
||||
headers = next(rows)
|
||||
header_indexes: Dict[str, int] = PeabodyParser._get_column_indexes_by_name(headers)
|
||||
|
||||
address_idx = header_indexes["full_address"]
|
||||
|
||||
|
||||
address_to_uprn: Dict[str, int] = {}
|
||||
# Generate random UPRNs for now
|
||||
next_uprn = 1 # TODO: get real UPRNs
|
||||
|
||||
for row in rows:
|
||||
address = row[address_idx]
|
||||
|
||||
if address is None:
|
||||
continue
|
||||
|
||||
address = address.strip()
|
||||
|
||||
if address not in address_to_uprn:
|
||||
address_to_uprn[address] = next_uprn
|
||||
next_uprn += 1
|
||||
|
||||
return address_to_uprn
|
||||
|
||||
|
||||
@staticmethod
|
||||
def _get_column_indexes_by_name(
|
||||
headers: Tuple[object | None, ...]
|
||||
headers: Tuple[object | None, ...],
|
||||
) -> Dict[str, int]:
|
||||
index: Dict[str, int] = {}
|
||||
|
||||
|
|
|
|||
86
backend/condition/persistence/condition_postgres.py
Normal file
86
backend/condition/persistence/condition_postgres.py
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
import time
|
||||
from typing import List, Optional
|
||||
from sqlmodel import Session
|
||||
|
||||
from utils.logger import setup_logger
|
||||
from backend.app.db.models.condition import (
|
||||
AspectConditionModel,
|
||||
ElementModel,
|
||||
PropertyConditionSurveyModel,
|
||||
)
|
||||
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
|
||||
from backend.app.db.connection import db_session
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class ConditionPostgres:
|
||||
|
||||
def bulk_insert_surveys(
|
||||
self, surveys: List[PropertyConditionSurvey], batch_size: Optional[int] = 100
|
||||
) -> None:
|
||||
logger.info(
|
||||
f"Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..."
|
||||
)
|
||||
survey_models: List[PropertyConditionSurveyModel] = [
|
||||
ConditionPostgres.map_survey_to_model(s) for s in surveys
|
||||
]
|
||||
total: int = len(survey_models)
|
||||
logger.info(
|
||||
f"Finished mapping {total} surveys. Writing to database in batches of {batch_size}..."
|
||||
)
|
||||
|
||||
with db_session() as session:
|
||||
for start in range(0, total, batch_size):
|
||||
end = min(start + batch_size, total)
|
||||
batch = survey_models[start:end]
|
||||
|
||||
t0: float = time.perf_counter()
|
||||
ConditionPostgres._insert_surveys_batch(batch, session)
|
||||
elapsed: float = time.perf_counter() - t0
|
||||
|
||||
logger.info(
|
||||
f"Inserted batch {start} - {end} ({len(batch)} surveys) in {elapsed} seconds",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def map_survey_to_model(
|
||||
survey: PropertyConditionSurvey,
|
||||
) -> PropertyConditionSurveyModel:
|
||||
survey_model = PropertyConditionSurveyModel(
|
||||
uprn=survey.uprn,
|
||||
date=survey.date,
|
||||
source=survey.source,
|
||||
elements=[],
|
||||
)
|
||||
|
||||
for element in survey.elements:
|
||||
element_model = ElementModel(
|
||||
element_type=element.element_type,
|
||||
element_instance=element.element_instance,
|
||||
aspect_conditions=[],
|
||||
)
|
||||
|
||||
for aspect in element.aspect_conditions:
|
||||
aspect_model = AspectConditionModel(
|
||||
aspect_type=aspect.aspect_type,
|
||||
aspect_instance=aspect.aspect_instance,
|
||||
value=aspect.value,
|
||||
quantity=aspect.quantity,
|
||||
install_date=aspect.install_date,
|
||||
renewal_year=aspect.renewal_year,
|
||||
comments=aspect.comments,
|
||||
)
|
||||
|
||||
element_model.aspect_conditions.append(aspect_model)
|
||||
|
||||
survey_model.elements.append(element_model)
|
||||
|
||||
return survey_model
|
||||
|
||||
@staticmethod
|
||||
def _insert_surveys_batch(
|
||||
surveys: List[PropertyConditionSurveyModel], session: Session
|
||||
) -> None:
|
||||
session.add_all(surveys)
|
||||
session.commit()
|
||||
|
|
@ -1,25 +1,33 @@
|
|||
from typing import Any, BinaryIO, List
|
||||
from datetime import datetime
|
||||
|
||||
from utils.logger import setup_logger
|
||||
from backend.condition.domain.mapping.mapper import Mapper
|
||||
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
|
||||
from backend.condition.parsing.parser import Parser
|
||||
from utils.logger import setup_logger
|
||||
from backend.condition.persistence.condition_postgres import ConditionPostgres
|
||||
from backend.condition.file_type import FileType, detect_file_type
|
||||
from backend.condition.parsing.factory import select_parser, select_mapper
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def process_file(file_stream: BinaryIO, source_key: str) -> None:
|
||||
print(f"[processor] Received file: {source_key}")
|
||||
logger.info(f"[processor] Received file: {source_key}")
|
||||
|
||||
# Instantiation
|
||||
file_type: FileType = detect_file_type(source_key)
|
||||
parser: Parser = select_parser(file_type)
|
||||
mapper: Mapper = select_mapper(file_type)
|
||||
persistence = ConditionPostgres()
|
||||
|
||||
# Orchestration
|
||||
raw_properties: List[Any] = parser.parse(file_stream)
|
||||
|
||||
logger.info(
|
||||
f"[processor] Finished loading customer survey data for {len(raw_properties)} properties. Mapping..."
|
||||
)
|
||||
|
||||
survey_year = datetime.now().year # TODO: get this from filepath or elsewhere
|
||||
|
||||
property_condition_surveys: List[PropertyConditionSurvey] = []
|
||||
|
|
@ -29,4 +37,10 @@ def process_file(file_stream: BinaryIO, source_key: str) -> None:
|
|||
mapper.map_asset_conditions_for_property(p, survey_year)
|
||||
)
|
||||
|
||||
print("done") # temp
|
||||
logger.info(
|
||||
f"[processor] Finished mapping {len(property_condition_surveys)} properties. Writing to database..."
|
||||
)
|
||||
|
||||
persistence.bulk_insert_surveys(property_condition_surveys)
|
||||
|
||||
logger.info(f"[processor] Finished loading surveys to database")
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
from backend.app.db.models.condition import PropertyConditionSurveyModel
|
||||
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
|
||||
|
||||
|
||||
|
|
@ -72,3 +73,41 @@ class CustomAsserts:
|
|||
f"{actual_aspect.comments} != {expected_aspect.comments}"
|
||||
)
|
||||
return True
|
||||
|
||||
def assert_property_condition_survey_model_matches_expected(
|
||||
actual_model: PropertyConditionSurveyModel,
|
||||
expected: dict,
|
||||
) -> None:
|
||||
assert actual_model.uprn == expected["uprn"], "UPRN differs"
|
||||
assert actual_model.date == expected["date"], "Date differs"
|
||||
assert actual_model.source == expected["source"], "Source differs"
|
||||
|
||||
assert len(actual_model.elements) == len(expected["elements"]), (
|
||||
f"Expected {len(expected['elements'])} elements, "
|
||||
f"got {len(actual_model.elements)}"
|
||||
)
|
||||
|
||||
for i, (actual_element, expected_element) in enumerate(
|
||||
zip(actual_model.elements, expected["elements"])
|
||||
):
|
||||
assert (
|
||||
actual_element.element_type == expected_element["element_type"]
|
||||
), f"Element[{i}].element_type differs"
|
||||
assert (
|
||||
actual_element.element_instance == expected_element["element_instance"]
|
||||
), f"Element[{i}].element_instance differs"
|
||||
|
||||
assert len(actual_element.aspect_conditions) == len(
|
||||
expected_element["aspects"]
|
||||
), f"Element[{i}] aspect count differs"
|
||||
|
||||
for j, (actual_aspect, expected_aspect) in enumerate(
|
||||
zip(actual_element.aspect_conditions, expected_element["aspects"])
|
||||
):
|
||||
prefix = f"Element[{i}].Aspect[{j}]"
|
||||
|
||||
for key, value in expected_aspect.items():
|
||||
assert getattr(actual_aspect, key) == value, (
|
||||
f"{prefix}.{key} differs: "
|
||||
f"{getattr(actual_aspect, key)} != {value}"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,19 +1,23 @@
|
|||
import pytest
|
||||
from typing import Any
|
||||
from typing import Any, Dict
|
||||
from io import BytesIO
|
||||
from openpyxl import Workbook
|
||||
from datetime import datetime
|
||||
|
||||
from backend.condition.parsing.peabody_parser import PeabodyParser
|
||||
from backend.condition.parsing.records.peabody.peabody_asset_condition import PeabodyAssetCondition
|
||||
from backend.condition.parsing.records.peabody.peabody_asset_condition import (
|
||||
PeabodyAssetCondition,
|
||||
)
|
||||
from backend.condition.parsing.records.peabody.peabody_property import PeabodyProperty
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def peabody_assets_xlsx_bytes() -> BytesIO:
|
||||
wb = Workbook()
|
||||
survey_records_d_and_lower = wb.active
|
||||
survey_records_d_and_lower.title = "Survey Records - D & Lower"
|
||||
survey_records_d_and_lower.append([
|
||||
survey_records_d_and_lower.append(
|
||||
[
|
||||
"Lo_Reference",
|
||||
"full_address",
|
||||
"location_type_code",
|
||||
|
|
@ -30,8 +34,10 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
|
|||
"cloned",
|
||||
"lo_type_code",
|
||||
"condition_survey_date",
|
||||
])
|
||||
survey_records_d_and_lower.append([
|
||||
]
|
||||
)
|
||||
survey_records_d_and_lower.append(
|
||||
[
|
||||
"B000RAND",
|
||||
"1 RANDOM HOUSE LONDON",
|
||||
3,
|
||||
|
|
@ -47,9 +53,11 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
|
|||
330,
|
||||
"N",
|
||||
3,
|
||||
datetime(2025,12,4,9,17,0)
|
||||
])
|
||||
survey_records_d_and_lower.append([
|
||||
datetime(2025, 12, 4, 9, 17, 0),
|
||||
]
|
||||
)
|
||||
survey_records_d_and_lower.append(
|
||||
[
|
||||
"B000BLOCK",
|
||||
"1100 BLOCK",
|
||||
3,
|
||||
|
|
@ -65,9 +73,11 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
|
|||
330,
|
||||
"N",
|
||||
3,
|
||||
datetime(2025,12,4,9,17,0)
|
||||
])
|
||||
survey_records_d_and_lower.append([
|
||||
datetime(2025, 12, 4, 9, 17, 0),
|
||||
]
|
||||
)
|
||||
survey_records_d_and_lower.append(
|
||||
[
|
||||
"B000FAKE",
|
||||
"3 FAKE CLOSE LONDON",
|
||||
3,
|
||||
|
|
@ -83,9 +93,11 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
|
|||
1500.7,
|
||||
"N",
|
||||
3,
|
||||
datetime(2025,7,5,0,0,0)
|
||||
])
|
||||
survey_records_d_and_lower.append([
|
||||
datetime(2025, 7, 5, 0, 0, 0),
|
||||
]
|
||||
)
|
||||
survey_records_d_and_lower.append(
|
||||
[
|
||||
"B000MIS",
|
||||
"99 MISC ROAD LONDON",
|
||||
3,
|
||||
|
|
@ -101,9 +113,11 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
|
|||
None,
|
||||
"N",
|
||||
3,
|
||||
None
|
||||
])
|
||||
survey_records_d_and_lower.append([
|
||||
None,
|
||||
]
|
||||
)
|
||||
survey_records_d_and_lower.append(
|
||||
[
|
||||
"B000MIS",
|
||||
"99 MISC ROAD LONDON",
|
||||
3,
|
||||
|
|
@ -119,9 +133,9 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
|
|||
3531,
|
||||
"N",
|
||||
3,
|
||||
None
|
||||
])
|
||||
|
||||
None,
|
||||
]
|
||||
)
|
||||
|
||||
stream = BytesIO()
|
||||
wb.save(stream)
|
||||
|
|
@ -129,18 +143,32 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
|
|||
|
||||
return stream
|
||||
|
||||
def test_peabody_parser_parses_conditions(peabody_assets_xlsx_bytes):
|
||||
|
||||
@pytest.fixture
|
||||
def location_ref_to_uprn_map() -> Dict[str, int]:
|
||||
return {
|
||||
"B000RAND": 1,
|
||||
"B000BLOCK": 2,
|
||||
"B000FAKE": 3,
|
||||
"B000MIS": 4,
|
||||
}
|
||||
|
||||
|
||||
def test_peabody_parser_parses_conditions(
|
||||
peabody_assets_xlsx_bytes, location_ref_to_uprn_map
|
||||
):
|
||||
# arrange
|
||||
parser = PeabodyParser()
|
||||
|
||||
# act
|
||||
result: Any = parser.parse(peabody_assets_xlsx_bytes)
|
||||
result: Any = parser.parse(peabody_assets_xlsx_bytes, location_ref_to_uprn_map)
|
||||
|
||||
# assert
|
||||
assert len(result) == 3
|
||||
|
||||
assert all(isinstance(item, PeabodyProperty) for item in result)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def asset_condition_factory():
|
||||
def _factory(full_address: str) -> PeabodyAssetCondition:
|
||||
|
|
@ -165,6 +193,7 @@ def asset_condition_factory():
|
|||
|
||||
return _factory
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"full_address, expected_block_level",
|
||||
[
|
||||
|
|
@ -175,7 +204,7 @@ def asset_condition_factory():
|
|||
("81A-B GORE ROAD LONDON", True),
|
||||
("73 & 74 HARVEST COURT ST. ALBANS", True),
|
||||
("25 HAVERSHAM COURT GREENFORD", False),
|
||||
("FLAT 10 SPARROW COURT SOUTHMERE DRIVE LONDON SE2 9ES", False)
|
||||
("FLAT 10 SPARROW COURT SOUTHMERE DRIVE LONDON SE2 9ES", False),
|
||||
],
|
||||
)
|
||||
def test_peabody_asset_is_block_level(
|
||||
|
|
|
|||
164
backend/condition/tests/persistence/test_condition_postgres.py
Normal file
164
backend/condition/tests/persistence/test_condition_postgres.py
Normal file
|
|
@ -0,0 +1,164 @@
|
|||
import pytest
|
||||
from datetime import date
|
||||
|
||||
from backend.condition.persistence.condition_postgres import ConditionPostgres
|
||||
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
|
||||
from backend.condition.domain.element import Element
|
||||
from backend.condition.domain.element_type import ElementType
|
||||
from backend.condition.domain.aspect_condition import AspectCondition
|
||||
from backend.condition.domain.aspect_type import AspectType
|
||||
from backend.app.db.models.condition import PropertyConditionSurveyModel
|
||||
from backend.condition.tests.custom_asserts import CustomAsserts
|
||||
|
||||
|
||||
def test_map_survey_to_model() -> None:
|
||||
# arrange
|
||||
survey = PropertyConditionSurvey(
|
||||
uprn=1,
|
||||
elements=[
|
||||
Element(
|
||||
element_type=ElementType.EXTERNAL_WINDOWS,
|
||||
element_instance=1,
|
||||
aspect_conditions=[
|
||||
AspectCondition(
|
||||
aspect_type=AspectType.MATERIAL,
|
||||
aspect_instance=1,
|
||||
value="UPVC Double Glazed",
|
||||
quantity=8,
|
||||
install_date=None,
|
||||
renewal_year=2036,
|
||||
comments=None,
|
||||
),
|
||||
],
|
||||
),
|
||||
Element(
|
||||
element_type=ElementType.EXTERNAL_DECORATION,
|
||||
element_instance=1,
|
||||
aspect_conditions=[
|
||||
AspectCondition(
|
||||
aspect_type=AspectType.CONDITION,
|
||||
aspect_instance=1,
|
||||
value="Normal",
|
||||
quantity=1,
|
||||
install_date=None,
|
||||
renewal_year=2029,
|
||||
comments=None,
|
||||
)
|
||||
],
|
||||
),
|
||||
Element(
|
||||
element_type=ElementType.EXTERNAL_WALL,
|
||||
element_instance=1,
|
||||
aspect_conditions=[
|
||||
AspectCondition(
|
||||
aspect_type=AspectType.FINISH,
|
||||
aspect_instance=1,
|
||||
value="Pointed",
|
||||
quantity=65,
|
||||
install_date=None,
|
||||
renewal_year=2045,
|
||||
comments=None,
|
||||
),
|
||||
AspectCondition(
|
||||
aspect_type=AspectType.FINISH,
|
||||
aspect_instance=1,
|
||||
value="Pointing",
|
||||
quantity=1,
|
||||
install_date=None,
|
||||
renewal_year=2069,
|
||||
comments=None,
|
||||
),
|
||||
AspectCondition(
|
||||
aspect_type=AspectType.FINISH,
|
||||
aspect_instance=2,
|
||||
value="Tile Hung",
|
||||
quantity=8,
|
||||
install_date=None,
|
||||
renewal_year=2049,
|
||||
comments=None,
|
||||
),
|
||||
],
|
||||
),
|
||||
],
|
||||
date=date(2000, 1, 1),
|
||||
source="Peabody",
|
||||
)
|
||||
|
||||
expected = {
|
||||
"uprn": 1,
|
||||
"date": date(2000, 1, 1),
|
||||
"source": "Peabody",
|
||||
"elements": [
|
||||
{
|
||||
"element_type": ElementType.EXTERNAL_WINDOWS,
|
||||
"element_instance": 1,
|
||||
"aspects": [
|
||||
{
|
||||
"aspect_type": AspectType.MATERIAL,
|
||||
"aspect_instance": 1,
|
||||
"value": "UPVC Double Glazed",
|
||||
"quantity": 8,
|
||||
"install_date": None,
|
||||
"renewal_year": 2036,
|
||||
"comments": None,
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"element_type": ElementType.EXTERNAL_DECORATION,
|
||||
"element_instance": 1,
|
||||
"aspects": [
|
||||
{
|
||||
"aspect_type": AspectType.CONDITION,
|
||||
"aspect_instance": 1,
|
||||
"value": "Normal",
|
||||
"quantity": 1,
|
||||
"install_date": None,
|
||||
"renewal_year": 2029,
|
||||
"comments": None,
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"element_type": ElementType.EXTERNAL_WALL,
|
||||
"element_instance": 1,
|
||||
"aspects": [
|
||||
{
|
||||
"aspect_instance": 1,
|
||||
"value": "Pointed",
|
||||
"quantity": 65,
|
||||
"install_date": None,
|
||||
"renewal_year": 2045,
|
||||
"comments": None,
|
||||
},
|
||||
{
|
||||
"aspect_type": AspectType.FINISH,
|
||||
"aspect_instance": 1,
|
||||
"value": "Pointing",
|
||||
"quantity": 1,
|
||||
"install_date": None,
|
||||
"renewal_year": 2069,
|
||||
"comments": None,
|
||||
},
|
||||
{
|
||||
"aspect_type": AspectType.FINISH,
|
||||
"aspect_instance": 2,
|
||||
"value": "Tile Hung",
|
||||
"quantity": 8,
|
||||
"install_date": None,
|
||||
"renewal_year": 2049,
|
||||
"comments": None,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
# act
|
||||
model: PropertyConditionSurveyModel = ConditionPostgres.map_survey_to_model(survey)
|
||||
|
||||
# assert (survey level)
|
||||
CustomAsserts.assert_property_condition_survey_model_matches_expected(
|
||||
model,
|
||||
expected,
|
||||
)
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
|
||||
# Pandas and numpy
|
||||
numpy==2.1.2
|
||||
pandas==2.2.3
|
||||
|
|
|
|||
BIN
backend/postcode_splitter/hackney.xlsx
Normal file
BIN
backend/postcode_splitter/hackney.xlsx
Normal file
Binary file not shown.
114
backend/postcode_splitter/main.py
Normal file
114
backend/postcode_splitter/main.py
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
import pandas as pd
|
||||
import requests
|
||||
from backend.address2UPRN.main import resolve_uprns_for_postcode_group, get_epc_data_with_postcode
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
|
||||
def sanitise_postcode(postcode: str) -> str | None:
|
||||
"""
|
||||
Normalise postcode for grouping.
|
||||
|
||||
- Uppercase
|
||||
- Remove all whitespace
|
||||
"""
|
||||
if pd.isna(postcode):
|
||||
return None
|
||||
|
||||
return postcode.upper().replace(" ", "")
|
||||
|
||||
|
||||
def is_valid_postcode(postcode_clean: str) -> bool:
|
||||
"""
|
||||
Validate postcode using postcodes.io.
|
||||
|
||||
Expects a sanitised postcode (e.g. E84SQ).
|
||||
Returns True if valid, False otherwise.
|
||||
"""
|
||||
POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
|
||||
if not postcode_clean:
|
||||
return False
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
|
||||
timeout=5,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("result", False)
|
||||
except requests.RequestException:
|
||||
# Network issues, rate limits, etc.
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
df = pd.read_excel("hackney.xlsx", sheet_name="Sustainability")
|
||||
df = df.head(500)
|
||||
|
||||
# Sanitise postcodes
|
||||
df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode)
|
||||
|
||||
# --- validate AFTER grouping (save API calls) ---
|
||||
|
||||
# Get unique, non-null postcodes
|
||||
unique_postcodes = (
|
||||
df["postcode_clean"]
|
||||
.dropna()
|
||||
.unique()
|
||||
)
|
||||
|
||||
# Validate each postcode once, TODOadd a progress bar
|
||||
postcode_validity = {
|
||||
pc: is_valid_postcode(pc)
|
||||
for pc in tqdm(unique_postcodes, total=len(unique_postcodes))
|
||||
}
|
||||
|
||||
# Map validity back onto dataframe
|
||||
df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
|
||||
|
||||
|
||||
results = []
|
||||
|
||||
for postcode, group_df in tqdm(
|
||||
df[df["postcode_valid"]].groupby("postcode_clean"),
|
||||
desc="Resolving UPRNs by postcode",
|
||||
):
|
||||
try:
|
||||
epc_df = get_epc_data_with_postcode(postcode)
|
||||
|
||||
if epc_df.empty:
|
||||
tmp = group_df.copy()
|
||||
tmp["found_uprn"] = None
|
||||
tmp["status"] = "no_epc_results"
|
||||
results.append(tmp)
|
||||
continue
|
||||
|
||||
resolved = resolve_uprns_for_postcode_group(
|
||||
group_df=group_df,
|
||||
epc_df=epc_df,
|
||||
)
|
||||
|
||||
results.append(resolved)
|
||||
|
||||
except Exception as e:
|
||||
tmp = group_df.copy()
|
||||
tmp["found_uprn"] = None
|
||||
tmp["status"] = "exception"
|
||||
tmp["error"] = str(e)
|
||||
results.append(tmp)
|
||||
|
||||
final_df = pd.concat(results, ignore_index=True)
|
||||
a = final_df[[
|
||||
"best_match_lexiscore","Address 1",
|
||||
"best_match_address", "Postcode",
|
||||
"UPRN", "best_match_uprn"
|
||||
]] # add levi score to viewing
|
||||
b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing
|
||||
b = b[[
|
||||
"best_match_lexiscore","Address 1",
|
||||
"best_match_address", "Postcode",
|
||||
"UPRN", "best_match_uprn"
|
||||
]]
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
11
conftest.py
11
conftest.py
|
|
@ -1,5 +1,11 @@
|
|||
import os
|
||||
from backend.app.config import get_settings
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
import os
|
||||
|
||||
# Load .env in conftest.py directory for local development
|
||||
load_dotenv()
|
||||
|
||||
DEFAULT_ENV = {
|
||||
"API_KEY": "test",
|
||||
|
|
@ -8,7 +14,10 @@ DEFAULT_ENV = {
|
|||
"DATA_BUCKET": "test",
|
||||
"PLAN_TRIGGER_BUCKET": "test",
|
||||
"ENGINE_SQS_URL": "test",
|
||||
"EPC_AUTH_TOKEN": "test", # overridden in GitHub Actions
|
||||
"EPC_AUTH_TOKEN": os.getenv(
|
||||
"EPC_AUTH_TOKEN",
|
||||
"test",
|
||||
), # overridden in GitHub Actions
|
||||
"GOOGLE_SOLAR_API_KEY": "test",
|
||||
"DB_HOST": "localhost",
|
||||
"DB_USERNAME": "test",
|
||||
|
|
|
|||
|
|
@ -1,111 +1,111 @@
|
|||
import pandas as pd
|
||||
|
||||
epc_c_recommendations = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
|
||||
"solid floor, ashp 3.0 - corrected.xlsx"
|
||||
)
|
||||
epc_b_recommendations = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
|
||||
"solid floor, ashp 3.0 - corrected.xlsx"
|
||||
)
|
||||
# epc_c_recommendations = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
|
||||
# "solid floor, ashp 3.0 - corrected.xlsx"
|
||||
# )
|
||||
# epc_b_recommendations = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
|
||||
# "solid floor, ashp 3.0 - corrected.xlsx"
|
||||
# )
|
||||
|
||||
epc_c_movers = epc_b_recommendations[
|
||||
epc_b_recommendations["current_epc_rating"] == "Epc.C"
|
||||
]
|
||||
epc_c_movers["property_type"].value_counts()
|
||||
# epc_c_movers = epc_b_recommendations[
|
||||
# epc_b_recommendations["current_epc_rating"] == "Epc.C"
|
||||
# ]
|
||||
# epc_c_movers["property_type"].value_counts()
|
||||
|
||||
house_epc_c_movers = epc_c_movers[
|
||||
epc_c_movers["property_type"] == "House"
|
||||
]
|
||||
house_epc_c_movers_with_solar = house_epc_c_movers[
|
||||
~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
|
||||
]
|
||||
# house_epc_c_movers = epc_c_movers[
|
||||
# epc_c_movers["property_type"] == "House"
|
||||
# ]
|
||||
# house_epc_c_movers_with_solar = house_epc_c_movers[
|
||||
# ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
|
||||
# ]
|
||||
|
||||
house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
|
||||
~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
|
||||
]
|
||||
# house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
|
||||
# ~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
|
||||
# ]
|
||||
|
||||
flat_epc_c_movers = epc_c_movers[
|
||||
epc_c_movers["property_type"] == "Flat"
|
||||
]
|
||||
# flat_epc_c_movers = epc_c_movers[
|
||||
# epc_c_movers["property_type"] == "Flat"
|
||||
# ]
|
||||
|
||||
epc_c_recommendations["sap_points"].mean()
|
||||
epc_c_recommendations["sap_points"].mean()
|
||||
# epc_c_recommendations["sap_points"].mean()
|
||||
# epc_c_recommendations["sap_points"].mean()
|
||||
|
||||
measure_cols = [
|
||||
"air_source_heat_pump",
|
||||
"boiler_upgrade",
|
||||
"cavity_wall_insulation",
|
||||
"double_glazing",
|
||||
"external_wall_insulation",
|
||||
"flat_roof_insulation",
|
||||
"high_heat_retention_storage_heaters",
|
||||
"internal_wall_insulation",
|
||||
"loft_insulation",
|
||||
"low_energy_lighting",
|
||||
"mechanical_ventilation",
|
||||
"room_roof_insulation",
|
||||
"roomstat_programmer_trvs",
|
||||
"sealing_open_fireplace",
|
||||
"secondary_glazing",
|
||||
"secondary_heating",
|
||||
"solar_pv",
|
||||
"solar_pv_with_battery",
|
||||
"suspended_floor_insulation",
|
||||
"time_temperature_zone_control",
|
||||
]
|
||||
# measure_cols = [
|
||||
# "air_source_heat_pump",
|
||||
# "boiler_upgrade",
|
||||
# "cavity_wall_insulation",
|
||||
# "double_glazing",
|
||||
# "external_wall_insulation",
|
||||
# "flat_roof_insulation",
|
||||
# "high_heat_retention_storage_heaters",
|
||||
# "internal_wall_insulation",
|
||||
# "loft_insulation",
|
||||
# "low_energy_lighting",
|
||||
# "mechanical_ventilation",
|
||||
# "room_roof_insulation",
|
||||
# "roomstat_programmer_trvs",
|
||||
# "sealing_open_fireplace",
|
||||
# "secondary_glazing",
|
||||
# "secondary_heating",
|
||||
# "solar_pv",
|
||||
# "solar_pv_with_battery",
|
||||
# "suspended_floor_insulation",
|
||||
# "time_temperature_zone_control",
|
||||
# ]
|
||||
|
||||
epc_c_melted = (
|
||||
epc_c_recommendations
|
||||
.melt(
|
||||
id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
|
||||
value_vars=measure_cols,
|
||||
var_name="measure_type",
|
||||
value_name="value",
|
||||
)
|
||||
.dropna(subset=["value"])
|
||||
)
|
||||
epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
|
||||
epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
|
||||
# epc_c_melted = (
|
||||
# epc_c_recommendations
|
||||
# .melt(
|
||||
# id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
|
||||
# value_vars=measure_cols,
|
||||
# var_name="measure_type",
|
||||
# value_name="value",
|
||||
# )
|
||||
# .dropna(subset=["value"])
|
||||
# )
|
||||
# epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
|
||||
# epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
|
||||
|
||||
epc_b_melted = (
|
||||
epc_b_recommendations
|
||||
.melt(
|
||||
id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
|
||||
value_vars=measure_cols,
|
||||
var_name="measure_type",
|
||||
value_name="value",
|
||||
)
|
||||
.dropna(subset=["value"])
|
||||
)
|
||||
# epc_b_melted = (
|
||||
# epc_b_recommendations
|
||||
# .melt(
|
||||
# id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
|
||||
# value_vars=measure_cols,
|
||||
# var_name="measure_type",
|
||||
# value_name="value",
|
||||
# )
|
||||
# .dropna(subset=["value"])
|
||||
# )
|
||||
|
||||
epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
|
||||
epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
|
||||
# epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
|
||||
# epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
|
||||
|
||||
measures_compared = epc_c_measures.merge(
|
||||
epc_b_measures,
|
||||
left_on="measure_type",
|
||||
right_on="measure_type",
|
||||
suffixes=("_epc_c", "_epc_b"),
|
||||
)
|
||||
# measures_compared = epc_c_measures.merge(
|
||||
# epc_b_measures,
|
||||
# left_on="measure_type",
|
||||
# right_on="measure_type",
|
||||
# suffixes=("_epc_c", "_epc_b"),
|
||||
# )
|
||||
|
||||
epc_c_retrofits = epc_c_recommendations[
|
||||
epc_c_recommendations["total_retrofit_cost"] > 0
|
||||
]
|
||||
# epc_c_retrofits = epc_c_recommendations[
|
||||
# epc_c_recommendations["total_retrofit_cost"] > 0
|
||||
# ]
|
||||
|
||||
epc_b_retrofits = epc_b_recommendations[
|
||||
epc_b_recommendations["total_retrofit_cost"] > 0
|
||||
]
|
||||
# epc_b_retrofits = epc_b_recommendations[
|
||||
# epc_b_recommendations["total_retrofit_cost"] > 0
|
||||
# ]
|
||||
|
||||
epc_c_retrofits["sap_points"].mean()
|
||||
epc_b_retrofits["sap_points"].mean()
|
||||
# epc_c_retrofits["sap_points"].mean()
|
||||
# epc_b_retrofits["sap_points"].mean()
|
||||
|
||||
properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))
|
||||
# properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))
|
||||
|
||||
properties_in_both["total_retrofit_cost_epc_c"].mean()
|
||||
properties_in_both["sap_points_epc_c"].mean()
|
||||
properties_in_both["total_retrofit_cost_epc_b"].mean()
|
||||
properties_in_both["sap_points_epc_b"].mean()
|
||||
# properties_in_both["total_retrofit_cost_epc_c"].mean()
|
||||
# properties_in_both["sap_points_epc_c"].mean()
|
||||
# properties_in_both["total_retrofit_cost_epc_b"].mean()
|
||||
# properties_in_both["sap_points_epc_b"].mean()
|
||||
|
||||
# Solar PV savings - we need the amount of solar PV bill savings
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
|
@ -114,16 +114,12 @@ from backend.app.db.models.recommendations import Recommendation, Plan, PlanReco
|
|||
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
|
||||
from collections import defaultdict
|
||||
|
||||
PORTFOLIO_ID = 435 # Peabody
|
||||
PORTFOLIO_ID = 485 # Peabody
|
||||
SCENARIOS = [
|
||||
908,
|
||||
909,
|
||||
910,
|
||||
970
|
||||
]
|
||||
scenario_names = {
|
||||
908: "EPC C - no solid floor, ashp 3.0",
|
||||
909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0",
|
||||
910: "EPC B - no solid floor, no EWI, ashp 3.0"
|
||||
970: "EPC C - no solid floor, ashp 3.0",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -236,307 +232,266 @@ recommendations_df = pd.DataFrame(recommendations_data)
|
|||
properties_df = pd.DataFrame(properties_data)
|
||||
plans_df = pd.DataFrame(plans_data)
|
||||
|
||||
s_id = 910
|
||||
ps_w_a_plan = plans_df[plans_df["scenario_id"] == s_id].copy()
|
||||
# Take the newest by scenario id
|
||||
ps_w_a_plan = ps_w_a_plan.sort_values("created_at", ascending=False).drop_duplicates(
|
||||
subset=["property_id"]
|
||||
)
|
||||
z = ps_w_a_plan[
|
||||
ps_w_a_plan["cost_of_works"] > 0
|
||||
].copy()
|
||||
z2 = properties_df[properties_df["property_id"].isin(z["property_id"].values)]
|
||||
# '', 'hot_water_cost_current',
|
||||
# 'lighting_cost_current', 'appliances_cost_current',
|
||||
# 'gas_standing_charge', 'electricity_standing_charge'
|
||||
z2["total_bills"] = z2["heating_cost_current"] + z2["hot_water_cost_current"] + z2["lighting_cost_current"] + z2[
|
||||
"appliances_cost_current"
|
||||
] + z2["gas_standing_charge"] + z2["electricity_standing_charge"]
|
||||
with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer:
|
||||
recommendations_df.to_excel(writer, sheet_name="recommendations", index=False)
|
||||
properties_df.to_excel(writer, sheet_name="properties", index=False)
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
# For a property ID, find a property where the no EWI/IWI approach is more expensive than the EWI approach
|
||||
pids = properties_df["property_id"].unique()
|
||||
for pid in tqdm(pids):
|
||||
# solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
|
||||
# average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
|
||||
|
||||
if pid in [603272, 550550, 574493]:
|
||||
continue
|
||||
|
||||
# get the plans
|
||||
property_plan = plans_df[plans_df["property_id"] == int(pid)]
|
||||
# Take the newest plan by scenario id
|
||||
property_plan = property_plan.sort_values("created_at", ascending=False).drop_duplicates(
|
||||
subset=["scenario_id"]
|
||||
)
|
||||
a = property_plan[property_plan["scenario_id"] == 909].squeeze() # no EWI/IWI
|
||||
b = property_plan[property_plan["scenario_id"] == 908].squeeze() # EWI
|
||||
if (a["cost_of_works"] > b["cost_of_works"]) and (
|
||||
a["post_epc_rating"].value == "C") and (b["cost_of_works"] > 5000):
|
||||
bah
|
||||
|
||||
solar_pv_recommendations = recommendations_df[
|
||||
recommendations_df["measure_type"] == "solar_pv"
|
||||
]
|
||||
# # Check tenures
|
||||
# initial_asset_data = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
||||
# "- Data Extracts for Domna.xlsx",
|
||||
# sheet_name="Properties"
|
||||
# )
|
||||
# sustainability_data = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
||||
# "- Data Extracts for Domna.xlsx",
|
||||
# sheet_name="Sustainability"
|
||||
# )
|
||||
|
||||
solid_wall_recommendation = recommendations_df[
|
||||
recommendations_df["scenario_id"].isin([908]) &
|
||||
recommendations_df["measure_type"].isin(["internal_wall_insulation"]) &
|
||||
recommendations_df["default"]
|
||||
]
|
||||
average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
|
||||
# Add on scenarion names
|
||||
average_savings["scenario_name"] = average_savings["scenario_id"].map(scenario_names)
|
||||
# sustainability_sample = sustainability_data[
|
||||
# sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values)
|
||||
# ]
|
||||
|
||||
# Check tenures
|
||||
initial_asset_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
||||
"- Data Extracts for Domna.xlsx",
|
||||
sheet_name="Properties"
|
||||
)
|
||||
sustainability_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
||||
"- Data Extracts for Domna.xlsx",
|
||||
sheet_name="Sustainability"
|
||||
)
|
||||
# sustainability_sample = sustainability_sample.merge(
|
||||
# initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset")
|
||||
# )
|
||||
|
||||
sustainability_sample = sustainability_data[
|
||||
sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values)
|
||||
]
|
||||
# block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
|
||||
# block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False)
|
||||
|
||||
sustainability_sample = sustainability_sample.merge(
|
||||
initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset")
|
||||
)
|
||||
# initial_asset_data.columns
|
||||
# initial_asset_data["LeaseType"].value_counts()
|
||||
|
||||
block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
|
||||
block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False)
|
||||
# # sustainability_sample["Tenure Group"].value_counts()
|
||||
# # Tenure Group
|
||||
# # General Needs 57787
|
||||
# # Home Ownership 25471
|
||||
# # Care & Supported Housing 4239
|
||||
# # Rental 2677
|
||||
# # Other 188
|
||||
|
||||
initial_asset_data.columns
|
||||
initial_asset_data["LeaseType"].value_counts()
|
||||
# df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index()
|
||||
# df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False)
|
||||
|
||||
# sustainability_sample["Tenure Group"].value_counts()
|
||||
# Tenure Group
|
||||
# General Needs 57787
|
||||
# Home Ownership 25471
|
||||
# Care & Supported Housing 4239
|
||||
# Rental 2677
|
||||
# Other 188
|
||||
# tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index()
|
||||
# tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)
|
||||
|
||||
df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index()
|
||||
df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False)
|
||||
# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()
|
||||
|
||||
tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index()
|
||||
tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)
|
||||
# sample_data = initial_asset_data[
|
||||
# ~initial_asset_data["Ownership Type"].isin(
|
||||
# [
|
||||
# # Commercial # Everything is resi - based on the Residential Indicator variable - all are true
|
||||
# # Freeholder
|
||||
# "FREEHOLDER", # 19517 properties
|
||||
# # HOMEBUY / EQUITY LOAN
|
||||
# "Rent to Homebuy", # 1 property
|
||||
# # Leaseholder
|
||||
# "LEASEHOLD 100%", # 8455 properties
|
||||
# "Owned and Managed - 999 year lease", # 2076 properties
|
||||
# "Managed but not Owned-Private Lease", # 159 properties
|
||||
# "Owned and managed LEASEHOLD", # 26 properties
|
||||
# # Outright Sale - can't find anything matching
|
||||
# # SHARED EQUITY
|
||||
# "Shared Ownership", # 4065 properties
|
||||
# "Shared Ownership Owned Not Managed", # 23 properties
|
||||
# # Extra categories which seem sensible to exclude
|
||||
# "NOT MANAGED AND NOT OWNED"
|
||||
# ]
|
||||
# )
|
||||
# ]
|
||||
|
||||
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()
|
||||
# sample_data["Ownership Type"].value_counts()
|
||||
|
||||
sample_data = initial_asset_data[
|
||||
~initial_asset_data["Ownership Type"].isin(
|
||||
[
|
||||
# Commercial # Everything is resi - based on the Residential Indicator variable - all are true
|
||||
# Freeholder
|
||||
"FREEHOLDER", # 19517 properties
|
||||
# HOMEBUY / EQUITY LOAN
|
||||
"Rent to Homebuy", # 1 property
|
||||
# Leaseholder
|
||||
"LEASEHOLD 100%", # 8455 properties
|
||||
"Owned and Managed - 999 year lease", # 2076 properties
|
||||
"Managed but not Owned-Private Lease", # 159 properties
|
||||
"Owned and managed LEASEHOLD", # 26 properties
|
||||
# Outright Sale - can't find anything matching
|
||||
# SHARED EQUITY
|
||||
"Shared Ownership", # 4065 properties
|
||||
"Shared Ownership Owned Not Managed", # 23 properties
|
||||
# Extra categories which seem sensible to exclude
|
||||
"NOT MANAGED AND NOT OWNED"
|
||||
]
|
||||
)
|
||||
]
|
||||
# sample_data = initial_asset_data[
|
||||
# initial_asset_data["Ownership Type"].isin(
|
||||
# [
|
||||
# "Owned and Managed",
|
||||
# "Owned and Managed - 999 year lease",
|
||||
# "Owned and managed LEASEHOLD",
|
||||
# "LEASEHOLD 100%",
|
||||
# "DATALOAD DEFAULT"
|
||||
# ]
|
||||
# )
|
||||
# ]
|
||||
# dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)]
|
||||
# dropped["Ownership Type"].value_counts()
|
||||
|
||||
sample_data["Ownership Type"].value_counts()
|
||||
# for value in [
|
||||
# # Commercial # Everything is resi, so should be fine. No matches
|
||||
# # Freeholder
|
||||
# "FREEHOLDER", # 19517 properties
|
||||
# # HOMEBUY / EQUITY LOAN
|
||||
# "Rent to Homebuy", # 1 property
|
||||
# # Leaseholder
|
||||
# "LEASEHOLD 100%", # 8455 properties
|
||||
# "Owned and Managed - 999 year lease", # 2076 properties
|
||||
# "Managed but not Owned-Private Lease", # 159 properties
|
||||
# "Owned and managed LEASEHOLD", # 26 properties
|
||||
# # Outright Sale - can't find anything matching
|
||||
# # SHARED EQUITY
|
||||
# "Shared Ownership", # 4065 properties
|
||||
# "Shared Ownership Owned Not Managed", # 23 properties
|
||||
# ]:
|
||||
# print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0])
|
||||
|
||||
sample_data = initial_asset_data[
|
||||
initial_asset_data["Ownership Type"].isin(
|
||||
[
|
||||
"Owned and Managed",
|
||||
"Owned and Managed - 999 year lease",
|
||||
"Owned and managed LEASEHOLD",
|
||||
"LEASEHOLD 100%",
|
||||
"DATALOAD DEFAULT"
|
||||
]
|
||||
)
|
||||
]
|
||||
dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)]
|
||||
dropped["Ownership Type"].value_counts()
|
||||
# house_types = [
|
||||
# "HOUSE",
|
||||
# "BUNGALOW",
|
||||
# "MAISONETTE",
|
||||
# "DUPLEX",
|
||||
# ]
|
||||
|
||||
for value in [
|
||||
# Commercial # Everything is resi, so should be fine. No matches
|
||||
# Freeholder
|
||||
"FREEHOLDER", # 19517 properties
|
||||
# HOMEBUY / EQUITY LOAN
|
||||
"Rent to Homebuy", # 1 property
|
||||
# Leaseholder
|
||||
"LEASEHOLD 100%", # 8455 properties
|
||||
"Owned and Managed - 999 year lease", # 2076 properties
|
||||
"Managed but not Owned-Private Lease", # 159 properties
|
||||
"Owned and managed LEASEHOLD", # 26 properties
|
||||
# Outright Sale - can't find anything matching
|
||||
# SHARED EQUITY
|
||||
"Shared Ownership", # 4065 properties
|
||||
"Shared Ownership Owned Not Managed", # 23 properties
|
||||
]:
|
||||
print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0])
|
||||
# guaranteed_control = [
|
||||
# "Owned and Managed",
|
||||
# "Owned and Managed - 999 year lease",
|
||||
# "Owned and managed LEASEHOLD",
|
||||
# "LEASEHOLD 100%",
|
||||
# "DATALOAD DEFAULT",
|
||||
# ]
|
||||
|
||||
house_types = [
|
||||
"HOUSE",
|
||||
"BUNGALOW",
|
||||
"MAISONETTE",
|
||||
"DUPLEX",
|
||||
]
|
||||
# sample_data = initial_asset_data[
|
||||
# (
|
||||
# initial_asset_data["Ownership Type"].isin(guaranteed_control)
|
||||
# )
|
||||
# |
|
||||
# (
|
||||
# (initial_asset_data["Ownership Type"] == "FREEHOLDER")
|
||||
# &
|
||||
# (initial_asset_data["Property Type"].isin(house_types))
|
||||
# )
|
||||
# ]
|
||||
|
||||
guaranteed_control = [
|
||||
"Owned and Managed",
|
||||
"Owned and Managed - 999 year lease",
|
||||
"Owned and managed LEASEHOLD",
|
||||
"LEASEHOLD 100%",
|
||||
"DATALOAD DEFAULT",
|
||||
]
|
||||
# fabric_retrofit_sample = initial_asset_data[
|
||||
# initial_asset_data["Ownership Type"].isin(
|
||||
# [
|
||||
# "Owned and Managed",
|
||||
# "FREEHOLDER",
|
||||
# "DATALOAD DEFAULT",
|
||||
# ]
|
||||
# )
|
||||
# ]
|
||||
|
||||
sample_data = initial_asset_data[
|
||||
(
|
||||
initial_asset_data["Ownership Type"].isin(guaranteed_control)
|
||||
)
|
||||
|
|
||||
(
|
||||
(initial_asset_data["Ownership Type"] == "FREEHOLDER")
|
||||
&
|
||||
(initial_asset_data["Property Type"].isin(house_types))
|
||||
)
|
||||
]
|
||||
# initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
|
||||
# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
|
||||
|
||||
fabric_retrofit_sample = initial_asset_data[
|
||||
initial_asset_data["Ownership Type"].isin(
|
||||
[
|
||||
"Owned and Managed",
|
||||
"FREEHOLDER",
|
||||
"DATALOAD DEFAULT",
|
||||
]
|
||||
)
|
||||
]
|
||||
# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts()
|
||||
# z = initial_asset_data[
|
||||
# ~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types)
|
||||
# ]
|
||||
|
||||
initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
|
||||
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
|
||||
# block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
|
||||
# zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"]
|
||||
|
||||
initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts()
|
||||
z = initial_asset_data[
|
||||
~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types)
|
||||
]
|
||||
# potential_sample = initial_asset_data[
|
||||
# ~pd.isnull(initial_asset_data["BlockCode"])
|
||||
# ]
|
||||
|
||||
block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
|
||||
zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"]
|
||||
# compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
|
||||
# initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
# left_on="Property Type",
|
||||
# right_on="Property Type",
|
||||
# suffixes=("_on_block_codes", "_overall")
|
||||
# )
|
||||
|
||||
potential_sample = initial_asset_data[
|
||||
~pd.isnull(initial_asset_data["BlockCode"])
|
||||
]
|
||||
# # Comparison of smaller sample vs overall
|
||||
# new_asset_data = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
|
||||
# "- Peabody "
|
||||
# "- Data Extracts for Domna v2.xlsx",
|
||||
# sheet_name="Properties"
|
||||
# )
|
||||
|
||||
compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
|
||||
initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
left_on="Property Type",
|
||||
right_on="Property Type",
|
||||
suffixes=("_on_block_codes", "_overall")
|
||||
)
|
||||
# new_sustainability_data = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
|
||||
# "- Peabody "
|
||||
# "- Data Extracts for Domna v2.xlsx",
|
||||
# sheet_name="Sustainability"
|
||||
# )
|
||||
|
||||
# Comparison of smaller sample vs overall
|
||||
new_asset_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
|
||||
"- Peabody "
|
||||
"- Data Extracts for Domna v2.xlsx",
|
||||
sheet_name="Properties"
|
||||
)
|
||||
# sap_bands = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
|
||||
# "08012026.xlsx",
|
||||
# )
|
||||
|
||||
new_sustainability_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
|
||||
"- Peabody "
|
||||
"- Data Extracts for Domna v2.xlsx",
|
||||
sheet_name="Sustainability"
|
||||
)
|
||||
# combined = new_asset_data.merge(
|
||||
# new_sustainability_data,
|
||||
# left_on="UPRN",
|
||||
# right_on="Org Ref",
|
||||
# suffixes=("_asset", "_sustainability")
|
||||
# ).merge(
|
||||
# sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef"
|
||||
# )
|
||||
# reduced_sample = combined[
|
||||
# ~combined["AH Tenure"].isin(
|
||||
# ["Commercial",
|
||||
# "Freeholder",
|
||||
# "HOMEBUY / EQUITY LOAN",
|
||||
# "Leaseholder",
|
||||
# "Outright Sale",
|
||||
# "SHARED EQUITY",
|
||||
# "Shared Ownership"]
|
||||
# )
|
||||
# ].copy()
|
||||
|
||||
sap_bands = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
|
||||
"08012026.xlsx",
|
||||
)
|
||||
# # property types
|
||||
# property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
|
||||
# combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
# left_on="Property Type",
|
||||
# right_on="Property Type",
|
||||
# suffixes=("_reduced_sample", "_overall")
|
||||
# )
|
||||
|
||||
combined = new_asset_data.merge(
|
||||
new_sustainability_data,
|
||||
left_on="UPRN",
|
||||
right_on="Org Ref",
|
||||
suffixes=("_asset", "_sustainability")
|
||||
).merge(
|
||||
sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef"
|
||||
)
|
||||
reduced_sample = combined[
|
||||
~combined["AH Tenure"].isin(
|
||||
["Commercial",
|
||||
"Freeholder",
|
||||
"HOMEBUY / EQUITY LOAN",
|
||||
"Leaseholder",
|
||||
"Outright Sale",
|
||||
"SHARED EQUITY",
|
||||
"Shared Ownership"]
|
||||
)
|
||||
].copy()
|
||||
# # lodged ratings
|
||||
# lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts(
|
||||
# normalize=True).to_frame().reset_index().merge(
|
||||
# combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
# left_on="Lodged EPC Band",
|
||||
# right_on="Lodged EPC Band",
|
||||
# suffixes=("_reduced_sample", "_overall")
|
||||
# )
|
||||
|
||||
# property types
|
||||
property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
|
||||
combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
left_on="Property Type",
|
||||
right_on="Property Type",
|
||||
suffixes=("_reduced_sample", "_overall")
|
||||
)
|
||||
# # modelled ratings
|
||||
# modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts(
|
||||
# normalize=True).to_frame().reset_index().merge(
|
||||
# combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
# left_on="SAP Band",
|
||||
# right_on="SAP Band",
|
||||
# suffixes=("_reduced_sample", "_overall")
|
||||
# )
|
||||
|
||||
# lodged ratings
|
||||
lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts(
|
||||
normalize=True).to_frame().reset_index().merge(
|
||||
combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
left_on="Lodged EPC Band",
|
||||
right_on="Lodged EPC Band",
|
||||
suffixes=("_reduced_sample", "_overall")
|
||||
)
|
||||
# # Testing measures
|
||||
# m1 = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
|
||||
# "solid floor, ashp 3.0 - 20250113 final.xlsx"
|
||||
# )
|
||||
# m2 = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
|
||||
# "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
|
||||
# )
|
||||
|
||||
# modelled ratings
|
||||
modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts(
|
||||
normalize=True).to_frame().reset_index().merge(
|
||||
combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(),
|
||||
left_on="SAP Band",
|
||||
right_on="SAP Band",
|
||||
suffixes=("_reduced_sample", "_overall")
|
||||
)
|
||||
# compare = m1.merge(
|
||||
# m2,
|
||||
# left_on="uprn",
|
||||
# right_on="uprn",
|
||||
# suffixes=("_ewi_iwi", "_no_ewi_iwi")
|
||||
# )
|
||||
|
||||
# Testing measures
|
||||
m1 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
|
||||
"solid floor, ashp 3.0 - 20250113 final.xlsx"
|
||||
)
|
||||
m2 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
|
||||
"solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
|
||||
)
|
||||
# # Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario
|
||||
# only_no_ewi_iwi = compare[
|
||||
# (compare["total_retrofit_cost_ewi_iwi"] == 0) &
|
||||
# (compare["total_retrofit_cost_no_ewi_iwi"] != 0)
|
||||
# ]
|
||||
|
||||
compare = m1.merge(
|
||||
m2,
|
||||
left_on="uprn",
|
||||
right_on="uprn",
|
||||
suffixes=("_ewi_iwi", "_no_ewi_iwi")
|
||||
)
|
||||
# (m1["total_retrofit_cost"] > 0).sum()
|
||||
# (m2["total_retrofit_cost"] > 0).sum()
|
||||
|
||||
# Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario
|
||||
only_no_ewi_iwi = compare[
|
||||
(compare["total_retrofit_cost_ewi_iwi"] == 0) &
|
||||
(compare["total_retrofit_cost_no_ewi_iwi"] != 0)
|
||||
]
|
||||
# with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0]
|
||||
|
||||
(m1["total_retrofit_cost"] > 0).sum()
|
||||
(m2["total_retrofit_cost"] > 0).sum()
|
||||
|
||||
with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0]
|
||||
|
||||
z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])]
|
||||
# z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])]
|
||||
|
|
|
|||
51
infrastructure/terraform/lambda/_template/README.md
Normal file
51
infrastructure/terraform/lambda/_template/README.md
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
## Checklist for adding a new Lambda
|
||||
|
||||
### 1. Create the Lambda scaffold
|
||||
- Copy the template:
|
||||
|
||||
cp -r lambda/_template lambda/<lambda_name>
|
||||
|
||||
---
|
||||
|
||||
### 2. Add infrastructure prerequisites (shared stack)
|
||||
- Add a new ECR repository in:
|
||||
|
||||
infrastructure/terraform/shared/main.tf
|
||||
|
||||
- Apply the shared stack
|
||||
- This requires commenting 'if env.stage == "prod"' in .github/workflows/deploy_terraform.yml
|
||||
|
||||
- Verify the ECR repository exists in AWS
|
||||
|
||||
---
|
||||
|
||||
### 3. Add Docker build configuration
|
||||
- Create a `Dockerfile` for the Lambda
|
||||
- Verify the Dockerfile path and build context
|
||||
- Add a new image build job in `deploy_terraform.yml` using `_build_image.yml`
|
||||
|
||||
---
|
||||
|
||||
### 4. Wire the Lambda deploy job (CI)
|
||||
- Add a deploy job using `_deploy_lambda.yml`
|
||||
- Ensure the deploy job depends on the image build job
|
||||
|
||||
---
|
||||
|
||||
### 5. Deploy
|
||||
- Push changes to GitHub
|
||||
- CI will:
|
||||
1. Build and push the Docker image
|
||||
2. Deploy the Lambda
|
||||
3. Verify everything deployed. Good things to check:
|
||||
- ECR with image
|
||||
- SQS
|
||||
- Trigger SQS
|
||||
- Cloud watch logs
|
||||
---
|
||||
### 5. Delete
|
||||
1. Delete README if you used cp -r
|
||||
|
||||
---
|
||||
|
||||
## Please feel free to update this document to make it easier for the next person
|
||||
14
infrastructure/terraform/lambda/_template/main.tf
Normal file
14
infrastructure/terraform/lambda/_template/main.tf
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
module "lambda" {
|
||||
source = "../modules/lambda_with_sqs"
|
||||
|
||||
name = REPLACE ME #"address2uprn" for example
|
||||
stage = var.stage
|
||||
|
||||
image_uri = local.image_uri
|
||||
|
||||
|
||||
environment = {
|
||||
STAGE = var.stage
|
||||
LOG_LEVEL = "info"
|
||||
}
|
||||
}
|
||||
16
infrastructure/terraform/lambda/_template/provider.tf
Normal file
16
infrastructure/terraform/lambda/_template/provider.tf
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 4.16"
|
||||
}
|
||||
}
|
||||
|
||||
backend "s3" {
|
||||
bucket = REPLACE_ME
|
||||
key = "terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
|
||||
required_version = ">= 1.2.0"
|
||||
}
|
||||
27
infrastructure/terraform/lambda/_template/variables.tf
Normal file
27
infrastructure/terraform/lambda/_template/variables.tf
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
variable "lambda_name" {
|
||||
type = string
|
||||
description = "Logical name of the lambda (e.g. address2uprn)"
|
||||
}
|
||||
|
||||
variable "stage" {
|
||||
description = "Deployment stage (e.g. dev, prod)"
|
||||
type = string
|
||||
}
|
||||
variable "ecr_repo_url" {
|
||||
type = string
|
||||
description = "ECR repository URL (no tag, no digest)"
|
||||
}
|
||||
|
||||
variable "image_digest" {
|
||||
type = string
|
||||
description = "Image digest (sha256:...)"
|
||||
}
|
||||
|
||||
|
||||
locals {
|
||||
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
|
||||
}
|
||||
|
||||
output "resolved_image_uri" {
|
||||
value = local.image_uri
|
||||
}
|
||||
14
infrastructure/terraform/lambda/address2UPRN/main.tf
Normal file
14
infrastructure/terraform/lambda/address2UPRN/main.tf
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
module "address2uprn" {
|
||||
source = "../modules/lambda_with_sqs"
|
||||
|
||||
name = "address2uprn"
|
||||
stage = var.stage
|
||||
|
||||
image_uri = local.image_uri
|
||||
|
||||
|
||||
environment = {
|
||||
STAGE = var.stage
|
||||
LOG_LEVEL = "info"
|
||||
}
|
||||
}
|
||||
17
infrastructure/terraform/lambda/address2UPRN/provider.tf
Normal file
17
infrastructure/terraform/lambda/address2UPRN/provider.tf
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 4.16"
|
||||
}
|
||||
}
|
||||
|
||||
backend "s3" {
|
||||
bucket = "address2uprn-terraform-state"
|
||||
key = "terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
|
||||
required_version = ">= 1.2.0"
|
||||
}
|
||||
|
||||
27
infrastructure/terraform/lambda/address2UPRN/variables.tf
Normal file
27
infrastructure/terraform/lambda/address2UPRN/variables.tf
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
variable "lambda_name" {
|
||||
type = string
|
||||
description = "Logical name of the lambda (e.g. address2uprn)"
|
||||
}
|
||||
|
||||
variable "stage" {
|
||||
description = "Deployment stage (e.g. dev, prod)"
|
||||
type = string
|
||||
}
|
||||
variable "ecr_repo_url" {
|
||||
type = string
|
||||
description = "ECR repository URL (no tag, no digest)"
|
||||
}
|
||||
|
||||
variable "image_digest" {
|
||||
type = string
|
||||
description = "Image digest (sha256:...)"
|
||||
}
|
||||
|
||||
|
||||
locals {
|
||||
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
|
||||
}
|
||||
|
||||
output "resolved_image_uri" {
|
||||
value = local.image_uri
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
############################################
|
||||
# IAM role
|
||||
############################################
|
||||
module "role" {
|
||||
source = "../../../modules/lambda_execution_role"
|
||||
name = "${var.name}-lambda-${var.stage}"
|
||||
}
|
||||
|
||||
############################################
|
||||
# SQS queue + DLQ
|
||||
############################################
|
||||
module "queue" {
|
||||
source = "../../../modules/sqs_queue"
|
||||
name = "${var.name}-queue-${var.stage}"
|
||||
}
|
||||
|
||||
############################################
|
||||
# Lambda
|
||||
############################################
|
||||
module "lambda" {
|
||||
source = "../../../modules/lambda_service"
|
||||
|
||||
name = "${var.name}-${var.stage}"
|
||||
role_arn = module.role.role_arn
|
||||
image_uri = var.image_uri
|
||||
|
||||
timeout = var.timeout
|
||||
memory_size = var.memory_size
|
||||
|
||||
environment = var.environment
|
||||
}
|
||||
|
||||
############################################
|
||||
# SQS → Lambda trigger
|
||||
############################################
|
||||
module "sqs_trigger" {
|
||||
source = "../../../modules/lambda_sqs_trigger"
|
||||
|
||||
lambda_arn = module.lambda.lambda_arn
|
||||
lambda_role_name = module.role.role_name
|
||||
queue_arn = module.queue.queue_arn
|
||||
|
||||
batch_size = var.batch_size
|
||||
}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
output "lambda_arn" {
|
||||
value = module.lambda.lambda_arn
|
||||
}
|
||||
|
||||
output "queue_arn" {
|
||||
value = module.queue.queue_arn
|
||||
}
|
||||
|
||||
output "queue_url" {
|
||||
value = module.queue.queue_url
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
variable "name" {
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "stage" {
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "image_uri" {
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "region" {
|
||||
type = string
|
||||
default = "eu-west-2"
|
||||
}
|
||||
|
||||
variable "timeout" {
|
||||
type = number
|
||||
default = 60
|
||||
}
|
||||
|
||||
variable "memory_size" {
|
||||
type = number
|
||||
default = 1024
|
||||
}
|
||||
|
||||
variable "environment" {
|
||||
type = map(string)
|
||||
default = {}
|
||||
}
|
||||
|
||||
variable "batch_size" {
|
||||
type = number
|
||||
default = 10
|
||||
}
|
||||
30
infrastructure/terraform/modules/container_registry/main.tf
Normal file
30
infrastructure/terraform/modules/container_registry/main.tf
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
resource "aws_ecr_repository" "this" {
|
||||
name = "${var.name}-${var.stage}"
|
||||
|
||||
image_tag_mutability = "MUTABLE"
|
||||
|
||||
image_scanning_configuration {
|
||||
scan_on_push = true
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_ecr_lifecycle_policy" "this" {
|
||||
repository = aws_ecr_repository.this.name
|
||||
|
||||
policy = jsonencode({
|
||||
rules = [
|
||||
{
|
||||
rulePriority = 1
|
||||
description = "Expire old images"
|
||||
selection = {
|
||||
tagStatus = "any"
|
||||
countType = "imageCountMoreThan"
|
||||
countNumber = var.retain_count
|
||||
}
|
||||
action = {
|
||||
type = "expire"
|
||||
}
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
output "repository_name" {
|
||||
value = aws_ecr_repository.this.name
|
||||
}
|
||||
|
||||
output "repository_url" {
|
||||
value = aws_ecr_repository.this.repository_url
|
||||
}
|
||||
|
||||
output "repository_arn" {
|
||||
value = aws_ecr_repository.this.arn
|
||||
}
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
variable "name" {
|
||||
description = "Base name of the repository (without stage)"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "stage" {
|
||||
description = "Deployment stage (e.g. dev, prod)"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "retain_count" {
|
||||
description = "Number of images to retain"
|
||||
type = number
|
||||
default = 10
|
||||
}
|
||||
|
|
@ -1,3 +1,6 @@
|
|||
# This ecr works for things deployed by serverless.
|
||||
# TODO: unify ecr and container_registry to one
|
||||
|
||||
resource "aws_ecr_repository" "my_repository" {
|
||||
name = "${var.ecr_name}"
|
||||
image_tag_mutability = "MUTABLE"
|
||||
|
|
|
|||
|
|
@ -2,3 +2,9 @@ output "ecr_repository_name" {
|
|||
description = "Name of the EPR repo in AWS"
|
||||
value = aws_ecr_repository.my_repository.name
|
||||
}
|
||||
|
||||
|
||||
output "ecr_repository_url" {
|
||||
description = "Full ECR repository URL"
|
||||
value = aws_ecr_repository.my_repository.repository_url
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
data "aws_iam_policy_document" "assume" {
|
||||
statement {
|
||||
effect = "Allow"
|
||||
principals {
|
||||
type = "Service"
|
||||
identifiers = ["lambda.amazonaws.com"]
|
||||
}
|
||||
actions = ["sts:AssumeRole"]
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_iam_role" "this" {
|
||||
name = var.name
|
||||
assume_role_policy = data.aws_iam_policy_document.assume.json
|
||||
}
|
||||
|
||||
resource "aws_iam_role_policy_attachment" "basic_logs" {
|
||||
role = aws_iam_role.this.name
|
||||
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
|
||||
}
|
||||
|
||||
resource "aws_iam_role_policy" "ecr_pull" {
|
||||
role = aws_iam_role.this.name
|
||||
|
||||
policy = jsonencode({
|
||||
Version = "2012-10-17"
|
||||
Statement = [{
|
||||
Effect = "Allow"
|
||||
Action = [
|
||||
"ecr:GetAuthorizationToken",
|
||||
"ecr:BatchGetImage",
|
||||
"ecr:GetDownloadUrlForLayer"
|
||||
]
|
||||
Resource = "*"
|
||||
}]
|
||||
})
|
||||
}
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
output "role_arn" {
|
||||
value = aws_iam_role.this.arn
|
||||
}
|
||||
|
||||
output "role_name" {
|
||||
value = aws_iam_role.this.name
|
||||
}
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
variable "name" {
|
||||
description = "IAM role name for the Lambda execution role"
|
||||
type = string
|
||||
}
|
||||
15
infrastructure/terraform/modules/lambda_service/main.tf
Normal file
15
infrastructure/terraform/modules/lambda_service/main.tf
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
resource "aws_lambda_function" "this" {
|
||||
function_name = var.name
|
||||
role = var.role_arn
|
||||
|
||||
package_type = "Image"
|
||||
image_uri = var.image_uri
|
||||
|
||||
timeout = var.timeout
|
||||
memory_size = var.memory_size
|
||||
publish = true
|
||||
|
||||
environment {
|
||||
variables = var.environment
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
output "lambda_arn" {
|
||||
value = aws_lambda_function.this.arn
|
||||
}
|
||||
18
infrastructure/terraform/modules/lambda_service/variables.tf
Normal file
18
infrastructure/terraform/modules/lambda_service/variables.tf
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
variable "name" { type = string }
|
||||
variable "role_arn" { type = string }
|
||||
variable "image_uri" { type = string }
|
||||
|
||||
variable "timeout" {
|
||||
type = number
|
||||
default = 30
|
||||
}
|
||||
|
||||
variable "memory_size" {
|
||||
type = number
|
||||
default = 512
|
||||
}
|
||||
|
||||
variable "environment" {
|
||||
type = map(string)
|
||||
default = {}
|
||||
}
|
||||
23
infrastructure/terraform/modules/lambda_sqs_trigger/main.tf
Normal file
23
infrastructure/terraform/modules/lambda_sqs_trigger/main.tf
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
resource "aws_lambda_event_source_mapping" "this" {
|
||||
event_source_arn = var.queue_arn
|
||||
function_name = var.lambda_arn
|
||||
batch_size = var.batch_size
|
||||
enabled = true
|
||||
}
|
||||
|
||||
resource "aws_iam_role_policy" "allow_sqs" {
|
||||
role = var.lambda_role_name
|
||||
|
||||
policy = jsonencode({
|
||||
Version = "2012-10-17"
|
||||
Statement = [{
|
||||
Effect = "Allow"
|
||||
Action = [
|
||||
"sqs:ReceiveMessage",
|
||||
"sqs:DeleteMessage",
|
||||
"sqs:GetQueueAttributes"
|
||||
]
|
||||
Resource = var.queue_arn
|
||||
}]
|
||||
})
|
||||
}
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
variable "lambda_arn" { type = string }
|
||||
variable "lambda_role_name" { type = string }
|
||||
variable "queue_arn" { type = string }
|
||||
|
||||
variable "batch_size" {
|
||||
type = number
|
||||
default = 10
|
||||
}
|
||||
14
infrastructure/terraform/modules/sqs_queue/main.tf
Normal file
14
infrastructure/terraform/modules/sqs_queue/main.tf
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
resource "aws_sqs_queue" "dlq" {
|
||||
name = "${var.name}-dlq"
|
||||
}
|
||||
|
||||
resource "aws_sqs_queue" "this" {
|
||||
name = var.name
|
||||
|
||||
visibility_timeout_seconds = 120
|
||||
|
||||
redrive_policy = jsonencode({
|
||||
deadLetterTargetArn = aws_sqs_queue.dlq.arn
|
||||
maxReceiveCount = var.max_receive_count
|
||||
})
|
||||
}
|
||||
7
infrastructure/terraform/modules/sqs_queue/outputs.tf
Normal file
7
infrastructure/terraform/modules/sqs_queue/outputs.tf
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
output "queue_arn" {
|
||||
value = aws_sqs_queue.this.arn
|
||||
}
|
||||
|
||||
output "queue_url" {
|
||||
value = aws_sqs_queue.this.url
|
||||
}
|
||||
6
infrastructure/terraform/modules/sqs_queue/variables.tf
Normal file
6
infrastructure/terraform/modules/sqs_queue/variables.tf
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
variable "name" { type = string }
|
||||
|
||||
variable "max_receive_count" {
|
||||
type = number
|
||||
default = 5
|
||||
}
|
||||
30
infrastructure/terraform/modules/tf_state_bucket/main.tf
Normal file
30
infrastructure/terraform/modules/tf_state_bucket/main.tf
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
resource "aws_s3_bucket" "this" {
|
||||
bucket = var.bucket_name
|
||||
}
|
||||
|
||||
resource "aws_s3_bucket_versioning" "this" {
|
||||
bucket = aws_s3_bucket.this.id
|
||||
|
||||
versioning_configuration {
|
||||
status = "Enabled"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_s3_bucket_server_side_encryption_configuration" "this" {
|
||||
bucket = aws_s3_bucket.this.id
|
||||
|
||||
rule {
|
||||
apply_server_side_encryption_by_default {
|
||||
sse_algorithm = "AES256"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_s3_bucket_public_access_block" "this" {
|
||||
bucket = aws_s3_bucket.this.id
|
||||
|
||||
block_public_acls = true
|
||||
block_public_policy = true
|
||||
ignore_public_acls = true
|
||||
restrict_public_buckets = true
|
||||
}
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
output "bucket_name" {
|
||||
value = aws_s3_bucket.this.bucket
|
||||
}
|
||||
|
||||
output "bucket_arn" {
|
||||
value = aws_s3_bucket.this.arn
|
||||
}
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
variable "bucket_name" {
|
||||
type = string
|
||||
}
|
||||
|
|
@ -1,5 +1,4 @@
|
|||
stage = "dev"
|
||||
profile = "DevAdmin"
|
||||
region = "eu-west-2"
|
||||
|
||||
# Domain
|
||||
|
|
@ -8,7 +8,6 @@ terraform {
|
|||
backend "s3" {
|
||||
bucket = "assessment-model-terraform-state"
|
||||
region = "eu-west-2"
|
||||
profile = "DevAdmin"
|
||||
key = "terraform.tfstate"
|
||||
}
|
||||
|
||||
|
|
@ -16,7 +15,6 @@ terraform {
|
|||
}
|
||||
|
||||
provider "aws" {
|
||||
profile = var.profile
|
||||
region = var.region
|
||||
}
|
||||
|
||||
|
|
@ -91,101 +89,101 @@ resource "aws_db_instance" "default" {
|
|||
|
||||
# Set up the bucket that recieve the csv uploads of epc to be retrofit
|
||||
module "s3_presignable_bucket" {
|
||||
source = "./modules/s3_presignable_bucket"
|
||||
source = "../modules/s3_presignable_bucket"
|
||||
bucketname = "retrofit-plan-inputs-${var.stage}"
|
||||
environment = var.stage
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "s3_due_considerations_bucket" {
|
||||
source = "./modules/s3_presignable_bucket"
|
||||
source = "../modules/s3_presignable_bucket"
|
||||
bucketname = "retrofit-due-considerations-${var.stage}"
|
||||
environment = var.stage
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "s3_eco_spreadseet_bucket" {
|
||||
source = "./modules/s3_presignable_bucket"
|
||||
source = "../modules/s3_presignable_bucket"
|
||||
bucketname = "retrofit-eco-spreadsheet-${var.stage}"
|
||||
environment = var.stage
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "s3" {
|
||||
source = "./modules/s3"
|
||||
source = "../modules/s3"
|
||||
bucketname = "retrofit-datalake-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "model_directory" {
|
||||
source = "./modules/s3"
|
||||
source = "../modules/s3"
|
||||
bucketname = "retrofit-model-directory-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "retrofit_sap_predictions" {
|
||||
source = "./modules/s3"
|
||||
source = "../modules/s3"
|
||||
bucketname = "retrofit-sap-predictions-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "retrofit_sap_data" {
|
||||
source = "./modules/s3"
|
||||
source = "../modules/s3"
|
||||
bucketname = "retrofit-data-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "retrofit_carbon_predictions" {
|
||||
source = "./modules/s3"
|
||||
source = "../modules/s3"
|
||||
bucketname = "retrofit-carbon-predictions-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "retrofit_heat_predictions" {
|
||||
source = "./modules/s3"
|
||||
source = "../modules/s3"
|
||||
bucketname = "retrofit-heat-predictions-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "retrofit_lighting_cost_predictions" {
|
||||
source = "./modules/s3"
|
||||
source = "../modules/s3"
|
||||
bucketname = "retrofit-lighting-cost-predictions-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "retrofit_heating_cost_predictions" {
|
||||
source = "./modules/s3"
|
||||
source = "../modules/s3"
|
||||
bucketname = "retrofit-heating-cost-predictions-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "retrofit_hot_water_cost_predictions" {
|
||||
source = "./modules/s3"
|
||||
source = "../modules/s3"
|
||||
bucketname = "retrofit-hot-water-cost-predictions-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "retrofit_heating_kwh_predictions" {
|
||||
source = "./modules/s3"
|
||||
source = "../modules/s3"
|
||||
bucketname = "retrofit-heating-kwh-predictions-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "retrofit_hotwater_kwh_predictions" {
|
||||
source = "./modules/s3"
|
||||
source = "../modules/s3"
|
||||
bucketname = "retrofit-hotwater-kwh-predictions-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "retrofit_sap_baseline_predictions" {
|
||||
source = "./modules/s3"
|
||||
source = "../modules/s3"
|
||||
bucketname = "retrofit-sap-baseline-predictions-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
// We make this bucket presignable, because we want to generate download links for the frontend
|
||||
module "retrofit_energy_assessments" {
|
||||
source = "./modules/s3_presignable_bucket"
|
||||
source = "../modules/s3_presignable_bucket"
|
||||
bucketname = "retrofit-energy-assessments-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
environment = var.stage
|
||||
|
|
@ -193,7 +191,7 @@ module "retrofit_energy_assessments" {
|
|||
|
||||
# Set up the route53 record for the API
|
||||
module "route53" {
|
||||
source = "./modules/route53"
|
||||
source = "../modules/route53"
|
||||
domain_name = var.domain_name
|
||||
api_url_prefix = var.api_url_prefix
|
||||
providers = {
|
||||
|
|
@ -201,75 +199,76 @@ module "route53" {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
# Create an ECR repository for storage of the lambda's docker images
|
||||
module "ecr" {
|
||||
ecr_name = "fastapi-repository-${var.stage}"
|
||||
source = "./modules/ecr"
|
||||
source = "../modules/ecr"
|
||||
}
|
||||
|
||||
module "lambda_sap_prediction_ecr" {
|
||||
ecr_name = "lambda-sap-prediction-${var.stage}"
|
||||
source = "./modules/ecr"
|
||||
source = "../modules/ecr"
|
||||
}
|
||||
|
||||
module "due_considerations_ecr" {
|
||||
ecr_name = "due-considerations-${var.stage}"
|
||||
source = "./modules/ecr"
|
||||
source = "../modules/ecr"
|
||||
}
|
||||
|
||||
module "eco_spreadsheet_ecr" {
|
||||
ecr_name = "eco-spreadsheet-${var.stage}"
|
||||
source = "./modules/ecr"
|
||||
source = "../modules/ecr"
|
||||
}
|
||||
|
||||
module "lambda_carbon_prediction_ecr" {
|
||||
ecr_name = "lambda-carbon-prediction-${var.stage}"
|
||||
source = "./modules/ecr"
|
||||
source = "../modules/ecr"
|
||||
}
|
||||
|
||||
module "lambda_heat_prediction_ecr" {
|
||||
ecr_name = "lambda-heat-prediction-${var.stage}"
|
||||
source = "./modules/ecr"
|
||||
source = "../modules/ecr"
|
||||
}
|
||||
|
||||
# ECR repos for lighting cost, heating cost and hot water cost models
|
||||
module "lambda_lighting_cost_prediction_ecr" {
|
||||
ecr_name = "lighting-cost-prediction-${var.stage}"
|
||||
source = "./modules/ecr"
|
||||
source = "../modules/ecr"
|
||||
}
|
||||
|
||||
module "lambda_heating_cost_prediction_ecr" {
|
||||
ecr_name = "heating-cost-prediction-${var.stage}"
|
||||
source = "./modules/ecr"
|
||||
source = "../modules/ecr"
|
||||
}
|
||||
|
||||
module "lambda_hot_water_cost_prediction_ecr" {
|
||||
ecr_name = "hot-water-cost-prediction-${var.stage}"
|
||||
source = "./modules/ecr"
|
||||
source = "../modules/ecr"
|
||||
}
|
||||
|
||||
# For heating and hot water kwh models
|
||||
module "lambda_heating_kwh_prediction_ecr" {
|
||||
ecr_name = "heating-kwh-prediction-${var.stage}"
|
||||
source = "./modules/ecr"
|
||||
source = "../modules/ecr"
|
||||
}
|
||||
|
||||
module "lambda_hotwater_kwh_prediction_ecr" {
|
||||
ecr_name = "hotwater-kwh-prediction-${var.stage}"
|
||||
source = "./modules/ecr"
|
||||
source = "../modules/ecr"
|
||||
}
|
||||
|
||||
# Baselining models
|
||||
module "sap_baseline_ecr" {
|
||||
ecr_name = "sap-baseline-prediction-${var.stage}"
|
||||
source = "./modules/ecr"
|
||||
source = "../modules/ecr"
|
||||
}
|
||||
|
||||
##############################################
|
||||
# CDN - Cloudfront
|
||||
##############################################
|
||||
module "cloudfront_distribution" {
|
||||
source = "./modules/cloudfront"
|
||||
source = "../modules/cloudfront"
|
||||
bucket_name = module.s3.bucket_name
|
||||
bucket_id = module.s3.bucket_id
|
||||
bucket_arn = module.s3.bucket_arn
|
||||
|
|
@ -281,7 +280,7 @@ module "cloudfront_distribution" {
|
|||
# SES - Email sending
|
||||
################################################
|
||||
module "ses" {
|
||||
source = "./modules/ses"
|
||||
source = "../modules/ses"
|
||||
domain_name = "domna.homes"
|
||||
stage = var.stage
|
||||
}
|
||||
|
|
@ -289,3 +288,27 @@ module "ses" {
|
|||
output "ses_dns_records" {
|
||||
value = module.ses.dns_records
|
||||
}
|
||||
|
||||
################################################
|
||||
# Address2UPRN – Lambda ECR
|
||||
################################################
|
||||
module "address2uprn_state_bucket" {
|
||||
source = "../modules/tf_state_bucket"
|
||||
bucket_name = "address2uprn-terraform-state"
|
||||
|
||||
}
|
||||
|
||||
output "address2uprn_state_bucket_name" {
|
||||
value = module.address2uprn_state_bucket.bucket_name
|
||||
}
|
||||
|
||||
module "address2uprn_registry" {
|
||||
source = "../modules/container_registry"
|
||||
name = "address2uprn"
|
||||
stage = var.stage
|
||||
|
||||
}
|
||||
|
||||
output "address2uprn_repository_url" {
|
||||
value = module.address2uprn_registry.repository_url
|
||||
}
|
||||
|
|
@ -3,11 +3,6 @@ variable stage {
|
|||
type = string
|
||||
}
|
||||
|
||||
variable "profile" {
|
||||
description = "AWS profile to use"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "region" {
|
||||
description = "AWS region"
|
||||
type = string
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
pydantic==2.9.2
|
||||
pydantic>=1.10.7
|
||||
pydantic-settings==2.6.0
|
||||
epc-api-python==1.0.2
|
||||
numpy==2.1.2
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
[pytest]
|
||||
pythonpath = .
|
||||
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
|
||||
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/onboarders/tests
|
||||
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests
|
||||
|
|
|
|||
|
|
@ -7,24 +7,29 @@ import numpy as np
|
|||
from backend.app.utils import sap_to_epc
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from backend.app.db.connection import db_engine, db_read_session
|
||||
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
|
||||
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel, PropertyDetailsSpatial
|
||||
from backend.app.db.models.recommendations import (
|
||||
Recommendation,
|
||||
Plan,
|
||||
PlanRecommendations,
|
||||
RecommendationMaterials,
|
||||
)
|
||||
from backend.app.db.models.portfolio import (
|
||||
PropertyModel,
|
||||
PropertyDetailsEpcModel,
|
||||
PropertyDetailsSpatial,
|
||||
)
|
||||
from backend.app.db.functions.materials_functions import get_materials
|
||||
from collections import defaultdict
|
||||
from sqlalchemy import func
|
||||
|
||||
# PORTFOLIO_ID = 206
|
||||
# SCENARIOS = [389]
|
||||
PORTFOLIO_ID = 435 # Peabody
|
||||
PORTFOLIO_ID = 502 # Peabody
|
||||
SCENARIOS = [
|
||||
908,
|
||||
909,
|
||||
910,
|
||||
986,
|
||||
]
|
||||
scenario_names = {
|
||||
908: "EPC C - no solid floor, ashp 3.0",
|
||||
909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0",
|
||||
910: "EPC B - no solid floor, no EWI, ashp 3.0"
|
||||
986: "EPC C",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -35,22 +40,26 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# --------------------
|
||||
# Properties
|
||||
# --------------------
|
||||
properties_query = session.query(
|
||||
PropertyModel,
|
||||
PropertyDetailsEpcModel
|
||||
).join(
|
||||
properties_query = (
|
||||
session.query(PropertyModel, PropertyDetailsEpcModel)
|
||||
.join(
|
||||
PropertyDetailsEpcModel,
|
||||
PropertyModel.id == PropertyDetailsEpcModel.property_id
|
||||
).filter(
|
||||
PropertyModel.portfolio_id == portfolio_id
|
||||
).all()
|
||||
PropertyModel.id == PropertyDetailsEpcModel.property_id,
|
||||
)
|
||||
.filter(PropertyModel.portfolio_id == portfolio_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
properties_data = [
|
||||
{
|
||||
**{col.name: getattr(p.PropertyModel, col.name)
|
||||
for col in PropertyModel.__table__.columns},
|
||||
**{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
|
||||
for col in PropertyDetailsEpcModel.__table__.columns},
|
||||
**{
|
||||
col.name: getattr(p.PropertyModel, col.name)
|
||||
for col in PropertyModel.__table__.columns
|
||||
},
|
||||
**{
|
||||
col.name: getattr(p.PropertyDetailsEpcModel, col.name)
|
||||
for col in PropertyDetailsEpcModel.__table__.columns
|
||||
},
|
||||
}
|
||||
for p in properties_query
|
||||
]
|
||||
|
|
@ -62,13 +71,10 @@ def get_data(portfolio_id, scenario_ids):
|
|||
session.query(
|
||||
Plan.scenario_id,
|
||||
Plan.property_id,
|
||||
func.max(Plan.created_at).label("latest_created_at")
|
||||
func.max(Plan.created_at).label("latest_created_at"),
|
||||
)
|
||||
.filter(Plan.scenario_id.in_(scenario_ids))
|
||||
.group_by(
|
||||
Plan.scenario_id,
|
||||
Plan.property_id
|
||||
)
|
||||
.group_by(Plan.scenario_id, Plan.property_id)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
|
|
@ -80,9 +86,9 @@ def get_data(portfolio_id, scenario_ids):
|
|||
session.query(Plan)
|
||||
.join(
|
||||
latest_plans_subq,
|
||||
(Plan.scenario_id == latest_plans_subq.c.scenario_id) &
|
||||
(Plan.property_id == latest_plans_subq.c.property_id) &
|
||||
(Plan.created_at == latest_plans_subq.c.latest_created_at)
|
||||
(Plan.scenario_id == latest_plans_subq.c.scenario_id)
|
||||
& (Plan.property_id == latest_plans_subq.c.property_id)
|
||||
& (Plan.created_at == latest_plans_subq.c.latest_created_at),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
|
@ -107,28 +113,29 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# --------------------
|
||||
# Recommendations (NO materials yet)
|
||||
# --------------------
|
||||
recommendations_query = session.query(
|
||||
Recommendation,
|
||||
Plan.scenario_id,
|
||||
PlanRecommendations.plan_id
|
||||
).join(
|
||||
recommendations_query = (
|
||||
session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id)
|
||||
.join(
|
||||
PlanRecommendations,
|
||||
Recommendation.id == PlanRecommendations.recommendation_id
|
||||
).join(
|
||||
Plan,
|
||||
Plan.id == PlanRecommendations.plan_id
|
||||
).filter(
|
||||
Recommendation.id == PlanRecommendations.recommendation_id,
|
||||
)
|
||||
.join(Plan, Plan.id == PlanRecommendations.plan_id)
|
||||
.filter(
|
||||
PlanRecommendations.plan_id.in_(plan_ids),
|
||||
Recommendation.default.is_(True),
|
||||
Recommendation.already_installed.is_(False)
|
||||
).all()
|
||||
Recommendation.already_installed.is_(False),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
recommendations_data = [
|
||||
{
|
||||
**{col.name: getattr(r.Recommendation, col.name)
|
||||
for col in Recommendation.__table__.columns},
|
||||
**{
|
||||
col.name: getattr(r.Recommendation, col.name)
|
||||
for col in Recommendation.__table__.columns
|
||||
},
|
||||
"scenario_id": r.scenario_id,
|
||||
"materials": [] # placeholder
|
||||
"materials": [], # placeholder
|
||||
}
|
||||
for r in recommendations_query
|
||||
]
|
||||
|
|
@ -138,23 +145,25 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# --------------------
|
||||
# Recommendation materials (SEPARATE QUERY)
|
||||
# --------------------
|
||||
materials_query = session.query(
|
||||
RecommendationMaterials
|
||||
).filter(
|
||||
RecommendationMaterials.recommendation_id.in_(recommendation_ids)
|
||||
).all()
|
||||
materials_query = (
|
||||
session.query(RecommendationMaterials)
|
||||
.filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids))
|
||||
.all()
|
||||
)
|
||||
|
||||
# Group materials by recommendation_id
|
||||
materials_by_recommendation = defaultdict(list)
|
||||
|
||||
for m in materials_query:
|
||||
materials_by_recommendation[m.recommendation_id].append({
|
||||
materials_by_recommendation[m.recommendation_id].append(
|
||||
{
|
||||
"material_id": m.material_id,
|
||||
"depth": m.depth,
|
||||
"quantity": m.quantity,
|
||||
"quantity_unit": m.quantity_unit,
|
||||
"estimated_cost": m.estimated_cost,
|
||||
})
|
||||
}
|
||||
)
|
||||
|
||||
# Attach materials safely (no filtering side effects)
|
||||
for r in recommendations_data:
|
||||
|
|
@ -165,7 +174,9 @@ def get_data(portfolio_id, scenario_ids):
|
|||
return properties_data, plans_data, recommendations_data
|
||||
|
||||
|
||||
properties_data, plans_data, recommendations_data = get_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS)
|
||||
properties_data, plans_data, recommendations_data = get_data(
|
||||
portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS
|
||||
)
|
||||
|
||||
properties_df = pd.DataFrame(properties_data)
|
||||
plans_df = pd.DataFrame(plans_data)
|
||||
|
|
@ -176,10 +187,8 @@ with db_read_session() as session:
|
|||
|
||||
materials = pd.DataFrame(materials)
|
||||
|
||||
material_lookup = (
|
||||
materials
|
||||
.set_index("id")[["type", "includes_battery"]]
|
||||
.to_dict("index")
|
||||
material_lookup = materials.set_index("id")[["type", "includes_battery"]].to_dict(
|
||||
"index"
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -193,14 +202,14 @@ def has_solar_with_battery(materials_list):
|
|||
return False
|
||||
|
||||
|
||||
recommendations_df["has_solar_with_battery"] = (
|
||||
recommendations_df["materials"].apply(has_solar_with_battery)
|
||||
recommendations_df["has_solar_with_battery"] = recommendations_df["materials"].apply(
|
||||
has_solar_with_battery
|
||||
)
|
||||
|
||||
recommendations_df["measure_type"] = np.where(
|
||||
recommendations_df["has_solar_with_battery"] == True,
|
||||
recommendations_df["measure_type"] + "_with_battery",
|
||||
recommendations_df["measure_type"]
|
||||
recommendations_df["measure_type"],
|
||||
)
|
||||
|
||||
# Adjust material type to indicate if there is a battery included
|
||||
|
|
@ -215,50 +224,67 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3
|
|||
|
||||
for scenario_id in SCENARIOS:
|
||||
# Get recs for this scenario
|
||||
recommended_measures_df = recommendations_df[recommendations_df["scenario_id"] == scenario_id][
|
||||
["property_id", "measure_type", "estimated_cost", "default"]
|
||||
recommended_measures_df = recommendations_df[
|
||||
recommendations_df["scenario_id"] == scenario_id
|
||||
][["property_id", "measure_type", "estimated_cost", "default"]]
|
||||
recommended_measures_df = recommended_measures_df[
|
||||
recommended_measures_df["default"]
|
||||
]
|
||||
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
|
||||
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
|
||||
|
||||
post_install_sap = recommendations_df[recommendations_df["scenario_id"] == scenario_id][
|
||||
["property_id", "default", "sap_points"]]
|
||||
post_install_sap = recommendations_df[
|
||||
recommendations_df["scenario_id"] == scenario_id
|
||||
][["property_id", "default", "sap_points"]]
|
||||
post_install_sap = post_install_sap[post_install_sap["default"]]
|
||||
# Sum up the sap points by property id
|
||||
post_install_sap = post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index()
|
||||
post_install_sap = (
|
||||
post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index()
|
||||
)
|
||||
|
||||
# Find dupes by property id and measure type
|
||||
dupes = recommended_measures_df.duplicated(subset=["property_id", "measure_type"], keep=False)
|
||||
dupes = recommended_measures_df.duplicated(
|
||||
subset=["property_id", "measure_type"], keep=False
|
||||
)
|
||||
dupe_df = recommended_measures_df[dupes]
|
||||
|
||||
if dupe_df.shape:
|
||||
# Drop dupes - happened due to a funny bug
|
||||
recommended_measures_df = recommended_measures_df.drop_duplicates(
|
||||
subset=["property_id", "measure_type"], keep='first'
|
||||
subset=["property_id", "measure_type"], keep="first"
|
||||
)
|
||||
|
||||
recommendations_measures_pivot = recommended_measures_df.pivot(
|
||||
index='property_id',
|
||||
columns='measure_type',
|
||||
values='estimated_cost'
|
||||
index="property_id", columns="measure_type", values="estimated_cost"
|
||||
)
|
||||
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
|
||||
|
||||
# Total cost is the row sum, excluding the property_id column
|
||||
recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop(
|
||||
columns=["property_id"]
|
||||
).sum(axis=1)
|
||||
recommendations_measures_pivot["total_retrofit_cost"] = (
|
||||
recommendations_measures_pivot.drop(columns=["property_id"]).sum(axis=1)
|
||||
)
|
||||
|
||||
df = properties_df[
|
||||
df = (
|
||||
properties_df[
|
||||
[
|
||||
"landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
|
||||
"heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
|
||||
"id"
|
||||
"landlord_property_id",
|
||||
"property_id",
|
||||
"uprn",
|
||||
"address",
|
||||
"postcode",
|
||||
"property_type",
|
||||
"walls",
|
||||
"roof",
|
||||
"heating",
|
||||
"windows",
|
||||
"current_epc_rating",
|
||||
"current_sap_points",
|
||||
"total_floor_area",
|
||||
"number_of_rooms",
|
||||
"id",
|
||||
]
|
||||
].merge(
|
||||
recommendations_measures_pivot, how="left", on="property_id"
|
||||
).merge(
|
||||
post_install_sap, how="left", on="property_id"
|
||||
]
|
||||
.merge(recommendations_measures_pivot, how="left", on="property_id")
|
||||
.merge(post_install_sap, how="left", on="property_id")
|
||||
)
|
||||
|
||||
# df = df.drop(columns=["property_id"])
|
||||
|
|
@ -266,20 +292,24 @@ for scenario_id in SCENARIOS:
|
|||
|
||||
df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
|
||||
df["predicted_post_works_sap"] = df["predicted_post_works_sap"]
|
||||
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
|
||||
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(
|
||||
lambda x: sap_to_epc(x)
|
||||
)
|
||||
df["uprn"] = df["uprn"].astype(str)
|
||||
|
||||
relevant_plans = plans_df[plans_df["scenario_id"] == scenario_id]
|
||||
df2 = df.merge(
|
||||
relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]], how="left", on="property_id",
|
||||
suffixes=("", "_plan")
|
||||
relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]],
|
||||
how="left",
|
||||
on="property_id",
|
||||
suffixes=("", "_plan"),
|
||||
)
|
||||
print(df2["predicted_post_works_epc"].value_counts())
|
||||
print(df2["post_epc_rating"].value_counts())
|
||||
|
||||
z = df2[
|
||||
(df2["predicted_post_works_epc"] != "D") &
|
||||
(df2["post_epc_rating"].astype(str) == "Epc.D")
|
||||
(df2["predicted_post_works_epc"] != "D")
|
||||
& (df2["post_epc_rating"].astype(str) == "Epc.D")
|
||||
]
|
||||
|
||||
df2["predicted_post_works_epc"].value_counts()
|
||||
|
|
@ -295,183 +325,6 @@ for scenario_id in SCENARIOS:
|
|||
df[df["predicted_post_works_sap"] == ""]
|
||||
|
||||
# Create excel to store to
|
||||
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
|
||||
f"Project/Final SAL/scenarios/{scenario_names[scenario_id]} - 20250114 final.xlsx")
|
||||
filename = f"{scenario_names[scenario_id]} - 20250113 final.xlsx"
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
df.to_excel(writer, sheet_name="properties", index=False)
|
||||
|
||||
|
||||
# asset_list = pd.DataFrame(asset_list)
|
||||
# asset_list = asset_list.rename(
|
||||
# columns={
|
||||
# "postcode": "domna_postcode"
|
||||
# }
|
||||
# )
|
||||
# if "domna_full_address":
|
||||
# # For Peabody
|
||||
# asset_list["domna_full_address"] = asset_list["domna_address_1"]
|
||||
#
|
||||
# asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy()
|
||||
# asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"})
|
||||
# asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str)
|
||||
# asset_list = asset_list.merge(
|
||||
# df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
|
||||
# how="left",
|
||||
# on="uprn"
|
||||
# )
|
||||
|
||||
|
||||
# Get conservation area data from property details spatial. based on the UPRNs
|
||||
def get_conservation_area_data(uprns):
|
||||
session = sessionmaker(bind=db_engine)()
|
||||
session.begin()
|
||||
|
||||
# Query to get conservation area data
|
||||
spatial_query = session.query(
|
||||
PropertyDetailsSpatial
|
||||
).filter(
|
||||
PropertyDetailsSpatial.uprn.in_(uprns) # Filter by UPRNs
|
||||
).all()
|
||||
|
||||
# Transform spatial data to include all fields dynamically
|
||||
spatial_data = [
|
||||
{col.name: getattr(spatial, col.name) for col in PropertyDetailsSpatial.__table__.columns}
|
||||
for spatial in spatial_query
|
||||
]
|
||||
|
||||
session.close()
|
||||
return pd.DataFrame(spatial_data)
|
||||
|
||||
|
||||
uprns = asset_list[
|
||||
~pd.isna(asset_list["uprn"]) & (asset_list["uprn"] != "<NA>")
|
||||
]["uprn"].astype(int).unique().tolist()
|
||||
conservation_area_data = get_conservation_area_data(uprns)
|
||||
conservation_area_data["uprn"] = conservation_area_data["uprn"].astype(str)
|
||||
asset_list = asset_list.merge(
|
||||
conservation_area_data[["uprn", "conservation_status", "is_listed_building", "is_heritage_building"]],
|
||||
how="left",
|
||||
on="uprn"
|
||||
)
|
||||
|
||||
# For exporting
|
||||
df.to_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/EPC C -without floors proposed measures - "
|
||||
"with ID.xlsx",
|
||||
index=False
|
||||
)
|
||||
# asset_list.to_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/epc_measures.xlsx",
|
||||
# index=False
|
||||
# )
|
||||
|
||||
condition_costs = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx",
|
||||
sheet_name="Prices - Khalim",
|
||||
header=35
|
||||
)
|
||||
# Remove unnamed columns and reset index
|
||||
condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')]
|
||||
condition_costs = condition_costs.reset_index(drop=True)
|
||||
|
||||
|
||||
# We now estimate condition cost
|
||||
def simulate_condition(asset_list, condition_costs):
|
||||
"""
|
||||
This function is for testing, and will simulate condition cost from 1-10 for each property to see what the
|
||||
costing array looks like.
|
||||
:param df:
|
||||
:return:
|
||||
"""
|
||||
|
||||
condition_df = []
|
||||
for _, row in asset_list.iterrows():
|
||||
|
||||
n_bathrooms = row["bathrooms"]
|
||||
|
||||
conditions = {}
|
||||
for condition in reversed(range(1, 11)):
|
||||
condition_cost = condition_costs[
|
||||
condition_costs["Condition"] == condition
|
||||
].drop(columns=["Condition"]).iloc[0]
|
||||
|
||||
# Each cost is scaled by floor area
|
||||
condition_cost = condition_cost * row["total_floor_area"]
|
||||
condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms
|
||||
|
||||
total_condition_cost = condition_cost.sum()
|
||||
conditions["Condition " + str(condition)] = (total_condition_cost)
|
||||
|
||||
condition_df.append(
|
||||
{
|
||||
"uprn": row["uprn"],
|
||||
**conditions
|
||||
}
|
||||
)
|
||||
|
||||
condition_df = pd.DataFrame(condition_df)
|
||||
|
||||
asset_list = asset_list.merge(
|
||||
condition_df,
|
||||
how="left",
|
||||
on="uprn"
|
||||
)
|
||||
|
||||
return asset_list
|
||||
|
||||
|
||||
# asset_list = simulate_condition(asset_list, condition_costs)
|
||||
|
||||
# We calculate the condition cost based on the condition
|
||||
for _, row in asset_list.iterrows():
|
||||
|
||||
condition = row["condition_score"]
|
||||
if condition in [None, ""]:
|
||||
continue
|
||||
condition = int(float(condition))
|
||||
|
||||
condition_cost = condition_costs[
|
||||
condition_costs["Condition"] == condition
|
||||
].drop(columns=["Condition"]).iloc[0]
|
||||
|
||||
# Each cost is scaled by floor area
|
||||
condition_cost = condition_cost * float(row["total_floor_area"])
|
||||
n_bathrooms = row["n_bathrooms"]
|
||||
condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms)
|
||||
|
||||
total_condition_cost = condition_cost.sum()
|
||||
asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost
|
||||
|
||||
# Store output
|
||||
asset_list.to_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx",
|
||||
index=False
|
||||
)
|
||||
|
||||
condition_cost_comparison = asset_list[
|
||||
["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"]
|
||||
]
|
||||
|
||||
# Testing
|
||||
plans_df.head()
|
||||
|
||||
example = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
|
||||
"SAL/scenarios/EPC C - no solid floor, no EWI or IWI, ashp 3.0 - 20250114 final.xlsx"
|
||||
)
|
||||
|
||||
plans_df2 = plans_df.merge(
|
||||
properties_df[["property_id", "landlord_property_id"]],
|
||||
left_on="property_id",
|
||||
right_on="property_id",
|
||||
how="left"
|
||||
)
|
||||
|
||||
plans_df2 = plans_df2[plans_df2["scenario_id"] == 909]
|
||||
|
||||
dupes = plans_df2[plans_df2["property_id"].duplicated()]
|
||||
|
||||
# merge on plans
|
||||
example = example.merge(
|
||||
plans_df, how="left",
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue