Merge branch 'main' of https://github.com/Hestia-Homes/Model into feature/ara-rebaselining

# Conflicts:
#	asset_list/app.py
This commit is contained in:
Khalim Conn-Kowlessar 2026-03-06 09:55:24 +00:00
commit 6d3b6beadc
146 changed files with 10149 additions and 3375 deletions

View file

@ -22,7 +22,9 @@
"jgclark.vscode-todo-highlight",
"corentinartaud.pdfpreview",
"ms-python.vscode-python-envs",
"ms-python.black-formatter"
"ms-python.black-formatter",
"GrapeCity.gc-excelviewer",
"jakobhoeg.vscode-pokemon"
],
"settings": {
"files.defaultWorkspace": "/workspaces/model",

View file

@ -43,4 +43,24 @@ WORKDIR /workspaces/model
# 6) Make Python find your package
# Add project root to PYTHONPATH for all processes
ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}
ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}
# Install terraform
RUN apt-get update && sudo apt-get install -y gnupg software-properties-common
RUN wget -O- https://apt.releases.hashicorp.com/gpg | \
gpg --dearmor | \
sudo tee /usr/share/keyrings/hashicorp-archive-keyring.gpg > /dev/null
RUN echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] \
https://apt.releases.hashicorp.com $(lsb_release -cs) main" | \
tee /etc/apt/sources.list.d/hashicorp.list
RUN apt update
RUN apt-get install terraform
RUN terraform -install-autocomplete
# Install postgres
RUN apt install -y wget gnupg2 lsb-release
RUN echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list
RUN wget -qO - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
RUN apt update
RUN apt install -y postgresql-14

View file

@ -6,7 +6,8 @@
"workspaceFolder": "/workspaces/model",
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
"mounts": [
"source=${localEnv:HOME},target=/workspaces/home,type=bind"
// "source=${localEnv:HOME},target=/home/vscode,type=bind",
"source=${localEnv:HOME}/.aws,target=/home/vscode/.aws,type=bind,consistency=cached"
],
"customizations": {
"vscode": {
@ -22,7 +23,11 @@
"corentinartaud.pdfpreview",
"ms-python.vscode-python-envs",
"ms-python.black-formatter",
"waderyan.gitblame"
"waderyan.gitblame",
"GrapeCity.gc-excelviewer",
"jakobhoeg.vscode-pokemon",
"github.vscode-github-actions",
"me-dutour-mathieu.vscode-github-actions"
],
"settings": {
"files.defaultWorkspace": "/workspaces/model",
@ -38,3 +43,4 @@
"PYTHONFLAGS": "-Xfrozen_modules=off"
}
}

View file

@ -9,7 +9,7 @@ mangum==0.19.0
# AWS
boto3==1.35.44
# Data
openpyxl==3.1.2
openpyxl==3.1.5
# Basic
pytz
uvicorn[standard]
@ -18,5 +18,9 @@ sqlmodel
pytest==9.0.2
pytest-cov==7.0.0
ipykernel>=6.25,<7
dotenv
psycopg[binary]
pytest-postgresql
# Formatting
black==26.1.0
black==26.1.0
boto3-stubs

View file

@ -38,6 +38,8 @@ on:
required: false
DEV_DB_NAME:
required: false
EPC_AUTH_TOKEN:
required: false
jobs:
build:
@ -47,6 +49,7 @@ jobs:
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
EPC_AUTH_TOKEN: ${{ secrets.EPC_AUTH_TOKEN }}
outputs:
image_digest: ${{ steps.digest.outputs.image_digest }}
@ -87,14 +90,17 @@ jobs:
temp=$(eval echo "$line")
BUILD_ARGS="$BUILD_ARGS --build-arg $temp"
done <<< "${{ inputs.build_args }}"
docker build \
docker buildx build \
--no-cache \
--platform linux/amd64 \
--provenance=false \
--sbom=false \
--push \
-f ${{ inputs.dockerfile_path }} \
$BUILD_ARGS \
-t $IMAGE_URI \
${{ inputs.build_context }}
docker push $IMAGE_URI
- name: Resolve image digest
id: digest

View file

@ -42,6 +42,22 @@ on:
required: true
AWS_REGION:
required: true
TF_VAR_db_host:
required: false
TF_VAR_db_name:
required: false
TF_VAR_db_port:
required: false
TF_VAR_api_key:
required: false
TF_VAR_secret_key:
required: false
TF_VAR_domain_name:
required: false
TF_VAR_epc_auth_token:
required: false
TF_VAR_google_solar_api_key:
required: false
jobs:
deploy:
@ -90,6 +106,15 @@ jobs:
- name: Terraform Plan
working-directory: ${{ inputs.lambda_path }}
env:
TF_VAR_db_host: ${{ secrets.TF_VAR_db_host }}
TF_VAR_db_name: ${{ secrets.TF_VAR_db_name }}
TF_VAR_db_port: ${{ secrets.TF_VAR_db_port }}
TF_VAR_api_key: ${{ secrets.TF_VAR_api_key }}
TF_VAR_secret_key: ${{ secrets.TF_VAR_secret_key }}
TF_VAR_domain_name: ${{ secrets.TF_VAR_domain_name }}
TF_VAR_epc_auth_token: ${{ secrets.TF_VAR_epc_auth_token }}
TF_VAR_google_solar_api_key: ${{ secrets.TF_VAR_google_solar_api_key }}
run: |
terraform plan \
-var="stage=${{ inputs.stage }}" \
@ -106,4 +131,18 @@ jobs:
- name: Terraform Destroy
if: inputs.terraform_destroy == 'true' && inputs.terraform_apply != 'true'
working-directory: ${{ inputs.lambda_path }}
run: terraform destroy -auto-approve
env:
TF_VAR_db_host: ${{ secrets.TF_VAR_db_host }}
TF_VAR_db_name: ${{ secrets.TF_VAR_db_name }}
TF_VAR_db_port: ${{ secrets.TF_VAR_db_port }}
TF_VAR_api_key: ${{ secrets.TF_VAR_api_key }}
TF_VAR_secret_key: ${{ secrets.TF_VAR_secret_key }}
TF_VAR_domain_name: ${{ secrets.TF_VAR_domain_name }}
TF_VAR_epc_auth_token: ${{ secrets.TF_VAR_epc_auth_token }}
TF_VAR_google_solar_api_key: ${{ secrets.TF_VAR_google_solar_api_key }}
run: |
terraform destroy -auto-approve \
-var="stage=${{ inputs.stage }}" \
-var="lambda_name=${{ inputs.lambda_name }}" \
-var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \
-var="image_digest=${{ inputs.image_digest }}"

View file

@ -87,7 +87,13 @@ jobs:
- name: Build Docker Image For Engine
run: |
docker build -t fastapi-lambda-image:${{ github.sha }} -f backend/docker/engine.Dockerfile . --load
docker buildx build \
--platform linux/amd64 \
--provenance=false \
--output=type=docker \
-t fastapi-lambda-image:${{ github.sha }} \
-f backend/docker/engine.Dockerfile \
.
- name: Login to ECR
run: |
@ -135,3 +141,4 @@ jobs:
# Deploy to AWS Lambda via Serverless
sls deploy --stage ${{ github.ref_name }} --verbose

View file

@ -3,12 +3,9 @@ name: Deploy infrastructure
on:
push:
branches:
- "**"
paths:
- 'infrastructure/terraform/**'
- '.github/workflows/deploy_terraform.yml'
- '.github/workflows/_build_image.yml'
- '.github/workflows/_deploy_lambda.yml'
- "dev"
- "prod"
workflow_dispatch:
jobs:
determine_stage:
@ -51,6 +48,7 @@ jobs:
runs-on: ubuntu-latest
env:
STAGE: ${{ needs.determine_stage.outputs.stage }}
TERRAFORM_APPLY: ${{ needs.determine_stage.outputs.terraform_apply }}
steps:
- uses: actions/checkout@v4
@ -76,10 +74,10 @@ jobs:
run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan
- name: Terraform Apply
if: env.STAGE == 'prod'
if: env.TERRAFORM_APPLY == 'true'
working-directory: infrastructure/terraform/shared
run: terraform apply -auto-approve tfplan
# ============================================================
# 2⃣ Build Address 2 UPRN image and Push
# ============================================================
@ -90,10 +88,19 @@ jobs:
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/address2UPRN/handler/Dockerfile
build_context: .
build_args: |
DEV_DB_HOST=$DEV_DB_HOST
DEV_DB_PORT=$DEV_DB_PORT
DEV_DB_NAME=$DEV_DB_NAME
EPC_AUTH_TOKEN=$EPC_AUTH_TOKEN
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }}
# ============================================================
# 3⃣ Deploy Address 2 UPRN Lambda
@ -140,7 +147,7 @@ jobs:
# 3⃣ Deploy Postcode Splitter Lambda
# ============================================================
postcodeSplitter_lambda:
needs: [postcodeSplitter_image, determine_stage]
needs: [postcodeSplitter_image, determine_stage, address2uprn_lambda]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: postcodeSplitter
@ -192,4 +199,85 @@ jobs:
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# Categorisation image and Push
# ============================================================
categorisation_image:
needs: [determine_stage, shared_terraform]
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/categorisation/handler/Dockerfile
build_context: .
build_args: |
DEV_DB_HOST=$DEV_DB_HOST
DEV_DB_PORT=$DEV_DB_PORT
DEV_DB_NAME=$DEV_DB_NAME
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
# ============================================================
# Deploy Categorisation Lambda
# ============================================================
categorisation_lambda:
needs: [categorisation_image, determine_stage]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: categorisation
lambda_path: infrastructure/terraform/lambda/categorisation
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.categorisation_image.outputs.image_digest }}
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# Ara Engine image and Push
# ============================================================
ara_engine_image:
needs: [determine_stage, shared_terraform]
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: engine-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/docker/engine.Dockerfile
build_context: .
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# Deploy Categorisation Lambda
# ============================================================
ara_engine_lambda:
needs: [ara_engine_image, determine_stage]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: ara_engine
lambda_path: infrastructure/terraform/lambda/engine
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: engine-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.ara_engine_image.outputs.image_digest }}
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
TF_VAR_db_host: ${{ secrets.DEV_DB_HOST }}
TF_VAR_db_name: ${{ secrets.DEV_DB_NAME }}
TF_VAR_db_port: ${{ secrets.DEV_DB_PORT }}
TF_VAR_api_key: ${{ secrets.DEV_API_KEY }}
TF_VAR_secret_key: ${{ secrets.DEV_SECRET_KEY }}
TF_VAR_domain_name: ${{ secrets.DEV_DOMAIN_NAME }}
TF_VAR_epc_auth_token: ${{ secrets.DEV_EPC_AUTH_TOKEN }}
TF_VAR_google_solar_api_key: ${{ secrets.DEV_GOOGLE_SOLAR_API_KEY }}

5
.gitignore vendored
View file

@ -279,4 +279,7 @@ cache/
*.png
*.pptx
local_data*
local_data*
# pyright local config
pyrightconfig.json

3
.idea/Model.iml generated
View file

@ -10,4 +10,7 @@
<orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="py.test" />
</component>
</module>

25
.idea/watcherTasks.xml generated Normal file
View file

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectTasksOptions">
<TaskOptions isEnabled="false">
<option name="arguments" value="$FilePath$" />
<option name="checkSyntaxErrors" value="true" />
<option name="description" />
<option name="exitCodeBehavior" value="ERROR" />
<option name="fileExtension" value="py" />
<option name="immediateSync" value="true" />
<option name="name" value="Pyright" />
<option name="output" value="" />
<option name="outputFilters">
<array />
</option>
<option name="outputFromStdout" value="false" />
<option name="program" value="$USER_HOME$/.nvm/versions/node/v18.15.0/bin/pyright" />
<option name="runOnExternalChanges" value="true" />
<option name="scopeName" value="Project Files" />
<option name="trackOnlyRoot" value="false" />
<option name="workingDir" value="$ProjectFileDir$" />
<envs />
</TaskOptions>
</component>
</project>

10
.vscode/settings.json vendored
View file

@ -9,12 +9,14 @@
"path": "/bin/bash"
}
},
<<<<<<< HEAD
=======
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.testing.pytestArgs": ["-s", "-q", "--no-cov"]
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
"python.testing.pytestArgs": ["-s", "-q", "--no-cov"],
"python.languageServer": "Pylance",
"python.analysis.typeCheckingMode": "strict",
"python.analysis.autoSearchPaths": true,
"python.analysis.extraPaths": ["./src"]
// Hot reload setting that needs to be in user settings
// "jupyter.runStartupCommands": [

File diff suppressed because it is too large Load diff

View file

@ -13,11 +13,15 @@ from asset_list.utils import get_data
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
load_dotenv(dotenv_path="backend/.env")
load_dotenv(dotenv_path="../backend/.env")
EPC_AUTH_TOKEN = os.getenv(
"EPC_AUTH_TOKEN",
)
OPENAI_API_KEY = os.getenv(
"OPENAI_API_KEY",
)
def extract_address1(
asset_list, full_address_col, postcode_col, method="first_two_words"
@ -69,18 +73,19 @@ def app():
Property UPRN
"""
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals"
data_filename = "For Modelling.xlsx"
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals/Missed"
# data_filename = "For Modelling - Final - reviewed.xlsx"
data_filename = "Missed Properties - with address.xlsx"
sheet_name = "Sheet1"
postcode_column = "Postcode"
address1_column = "address1"
address1_method = None
fulladdress_column = "full_address"
fulladdress_column = "address1"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = "UPRN"
landlord_property_type = None
landlord_property_type = "Type"
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
@ -102,43 +107,6 @@ def app():
asset_list_header = 0
landlord_block_reference = None
# Peabody data for cleaning
data_folder = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation"
)
data_filename = "to_standardise_uprns.xlsx"
sheet_name = "Sheet1"
postcode_column = "Postcode"
address1_column = None
address1_method = "house_number_extraction"
fulladdress_column = "Address"
address_cols_to_concat = None
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = "UPRN"
landlord_property_type = None
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "LLUPRN"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_id_colnames = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}
@ -441,10 +409,6 @@ def app():
)
asset_list.merge_data(epc_df)
# asset_list.standardised_asset_list = asset_list.standardised_asset_list[
# asset_list.standardised_asset_list["domna_full_address"]
# != "120 Airdrie Crescent, Burnley, Lancashire"
# ]
asset_list.extract_attributes()
asset_list.identify_worktypes()
@ -458,27 +422,6 @@ def app():
os.path.join(data_folder, ".".join(data_filename.split(".")[:-1]))
+ " - Standardised.xlsx"
)
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
# Determine inspections priority
# solar_jobs = asset_list.standardised_asset_list[~pd.isnull(asset_list.standardised_asset_list["solar_reason"])][
# "domna_postcode"].unique()
# asset_list.standardised_asset_list["in_solar_area"] = asset_list.standardised_asset_list["domna_postcode"].isin(
# solar_jobs
# )
# # Same for cav
# cavity_jobs = asset_list.standardised_asset_list[
# ~pd.isnull(asset_list.standardised_asset_list["cavity_reason"])
# ]["domna_postcode"].unique()
# asset_list.standardised_asset_list["in_cavity_area"] = asset_list.standardised_asset_list["domna_postcode"].isin(
# cavity_jobs
# )
# # We prioritise properties that are in solar areas and cavity areas
# import numpy as np
# asset_list.standardised_asset_list["inspection_priority"] = np.where(
# asset_list.standardised_asset_list["in_solar_area"] | asset_list.standardised_asset_list["in_cavity_area"],
# 1, 2
# )
with pd.ExcelWriter(filename) as writer:
asset_list.standardised_asset_list.to_excel(

View file

@ -528,6 +528,107 @@ BUILT_FORM_MAPPINGS = {
'House: Semi Detached: Top Floor': 'semi-detached',
'House: End Terrace: Ground Floor': 'end-terrace',
'Maisonette: Enclosed End Terrace: Mid Floor': 'enclosed end-terrace',
'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace'
'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace',
'2 BED MID TERRACED HOUSE': 'mid-terrace',
'4 BED SEMI DETACHED-PARLOURED': 'semi-detached',
'2 BED END TERRACED HOUSE': 'end-terrace',
'3 BED MID TERRACED HOUSE': 'mid-terrace',
'3 BED SEMI DETACHED HOUSE': 'semi-detached',
'3 BED MID TERRACE - PARLOURED': 'mid-terrace',
'3 BED END TERRACE - PARLOURED': 'end-terrace',
'4 BED+ END TERRACED HOUSE': 'end-terrace',
'3 BED END TERRACED HOUSE': 'end-terrace',
'3 BED SEMI DETACHED-PARLOURED': 'semi-detached',
'4 BED+ END TERRACE - PARLOURED': 'end-terrace',
'2 BED SEMI DETACHED HOUSE': 'semi-detached',
'3 BED DETACHED HOUSE': 'detached',
'2 BED GRD FLR COTT FLT-CNT STR': 'ground floor',
'2 BED 1ST FLOOR WALKUP FLAT': 'mid-floor',
'1 BED GRD FL COTT FLAT-OWN ENT': 'ground floor',
'1 BED 1ST FL WALK UP DECK ACC': 'mid-floor',
'2 BED MAISONETTE UPPER COM ENT': 'mid-floor',
'2 BED GRD FLR COTT FLT OWN ENT': 'ground floor',
'1 BED BUNGALOW': 'unknown',
'2 BED GRD FL COTT FLT-OWN ENTR': 'ground floor',
'1 BED 1ST FL COTT FLT-CNT STR': 'mid-floor',
'1 BED GRD FL WALK UP OWN ENT': 'ground floor',
'1 BED GRD FLOOR WALKUP FLAT': 'ground floor',
'2 BED GRD FLOOR WALKUP FLAT': 'ground floor',
'2 BED 1ST FLR FLT-SHELTERED': 'mid-floor',
'2 BED BUNGALOW': 'unknown',
'2 BED GRD FLR COTT FLT(P)-1950': 'ground floor',
'Ground Floor Front Left': 'ground floor',
'End-Terrace House': 'end-terrace',
'Ground floor': 'ground floor',
'Ground Floor Front Right': 'ground floor',
'End Terrace (GII List)': 'end-terrace',
'Semi Detached House': 'semi-detached',
'Ground Floor Right': 'ground floor',
'PB Ground Floor Flat': 'ground floor',
'Basement and Ground Floor': 'ground floor',
'Semi-detached bungalow': 'detached',
'Detached Cottage': 'detached',
'Lower & Ground Floor': 'ground floor',
'Ground FLoor Flat': 'ground floor',
'ground floor': 'ground floor',
'Ground Floor Left': 'ground floor',
'Semi-detached House': 'detached',
'Basement & Lower Ground': 'basement',
'Semi-Detached House': 'detached',
'Ground floor flat -': 'ground floor',
'Basement Flat': 'basement',
'semi-detached bungalow': 'semi-detached',
'Lower Ground Floor Flat': 'ground floor',
'Ground floor Flat': 'ground floor',
'Ground Floor flat': 'ground floor',
'Ground': 'ground floor',
'Semi detached Bungalow': 'semi-detached',
'ground floor flat': 'ground floor',
'Mid terrace House': 'mid-terrace',
'Raised Ground Floor': 'ground floor',
'Basement Floor': 'basement',
'Second floor flat': 'mid-floor',
'Fourth Floor Flat': 'mid-floor',
'First/Second Maisonette': 'mid-floor',
'Ground/First': 'ground floor',
'First and Second Floor': 'mid-floor',
'Terrace House': 'mid-terrace',
'1st/2nd Floor Maisonette': 'mid-floor',
'Semi-det House': 'semi-detached',
'First': 'mid-floor',
'Ground & First Floor': 'ground floor',
'End of Terrace House': 'end-terrace',
'2nd Floor Purpose Built': 'mid-floor',
'First/Second Floor Maison': 'mid-floor',
'GFF purpose built': 'ground floor',
'Second': 'mid-floor',
'Semi-det House (GII List)': 'semi-detached',
'3rd and 4th Floor': 'mid-floor',
'First Floor flat': 'mid-floor',
'Mid-Terrace House': 'mid-terrace',
'1st & 2nd Floors': 'mid-floor',
'Ground/first floor': 'ground floor',
'FFF purpose built': 'mid-floor',
'Second floor': 'mid-floor',
'Second/Third floor': 'mid-floor',
'First floor Flat': 'mid-floor',
'First floor': 'mid-floor',
'Lower Ground Flat': 'basement',
'First Floor Rear Flat': 'mid-floor',
'First & Second Floor': 'mid-floor',
'Ground & Lower Ground': 'basement',
'First Floor Rear': 'mid-floor',
'First & Second': 'mid-floor',
'First Floor Front': 'mid-floor',
'First & Second Floors': 'mid-floor',
'First/Second Floor': 'mid-floor',
'Sem-detach house': 'semi-detached',
'Second Floor Flat (Top)': 'top-floor',
'3 FloorTerrace House': 'mid-terrace',
'First floor flat': 'mid-floor',
'First & Second Floor Flat': 'mid-floor',
'First Floor Purpose Built': 'mid-floor',
'Purpose built First Floor': 'mid-floor',
}

View file

@ -498,6 +498,23 @@ HEATING_MAPPINGS = {
'Boiler: A rated Combi, System 2: Boiler: A rated Combi': 'gas combi boiler',
'System 2: Boiler: A rated Regular Boiler, Boiler: A rated Regular Boiler': 'gas boiler, radiators',
'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler'
'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler',
'IDEAL ISAR HE30': 'gas combi boiler',
'WORCESTER GREENSTAR 25 SI': 'gas combi boiler',
'POTTERTON PROMAX COMBI 28 HE PLUS': 'gas combi boiler',
'WORCESTER GREENSTAR 28I JUNIOR': 'gas combi boiler',
'BAXI ASSURE 25 COMBI': 'gas combi boiler',
'POTTERTON PROMAX COMBI 28 HE PLUS A': 'gas combi boiler',
'WORCESTER GREENSTAR 30 SI': 'gas combi boiler',
'POTTERTON SUPRIMA 40L': 'gas boiler, radiators',
'POTTERTON ASSURE 30 COMBI': 'gas combi boiler',
'POTTERTON PROMAX 28 COMBI ERP': 'gas combi boiler',
'BAXI ASSURE 30 COMBI': 'gas combi boiler',
'POTTERTON PROMAX 18 SYSTEM ERP': 'gas boiler, radiators',
'POTTERTON PROMAX COMBI 33 HE PLUS A': 'gas combi boiler',
'POTTERTON SUPRIMA 40 HE': 'gas boiler, radiators',
'FERROLI MODENA 102': 'gas boiler, radiators',
'POTTERTON PROMAX COMBI 24 HE PLUS A': 'gas combi boiler'
}

View file

@ -444,6 +444,9 @@ PROPERTY_MAPPING = {
'Warden Bungalow': 'bungalow',
'Warden Flat': 'flat',
'Upper Floor Flat': 'flat',
'Extracare Scheme': 'other'
'Extracare Scheme': 'other',
'SHELTERED': 'unknown',
'PARLOUR': 'unknown',
}

View file

@ -320,6 +320,8 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'Pitched (slates or tiles) access to loft, 100mm': 'pitched insulated',
'Pitched (slates or tiles) no loft access, 200mm': 'pitched insulated',
'Pitched (slates or tiles) access to loft, 200mm': 'pitched insulated',
'Pitched (slates or tiles) access to loft, 50mm': 'pitched less than 100mm insulation'
'Pitched (slates or tiles) access to loft, 50mm': 'pitched less than 100mm insulation',
'Pitched roofs': 'pitched unknown insulation',
}

View file

@ -369,6 +369,9 @@ WALL_CONSTRUCTION_MAPPINGS = {
'Solid Brick, As built': 'solid brick unknown insulation',
'System built, As built': 'system built unknown insulation',
'Timber frame, As built': 'timber frame unknown insulation',
'Cavity, As built': 'cavity unknown insulation'
'Cavity, As built': 'cavity unknown insulation',
'FILLED CAVITY': 'filled cavity',
'EXTERNAL': 'insulated solid brick',
'AS BUILT': 'other'
}

View file

@ -5,7 +5,7 @@ epc-api-python==1.0.2
thefuzz
boto3
openpyxl
openai>=1.3.5
openai==1.93.0
tiktoken
msgpack
beautifulsoup4

View file

@ -19,4 +19,5 @@ PLAN_TRIGGER_BUCKET=test
DATA_BUCKET=test
EPC_AUTH_TOKEN=test
ENGINE_SQS_URL=test
ENERGY_ASSESSMENTS_BUCKET=test
CATEGORISATION_SQS_URL=test
ENERGY_ASSESSMENTS_BUCKET=test

View file

@ -8,7 +8,11 @@ from utils.s3 import read_from_s3, save_excel_to_s3
from backend.app.utils import sap_to_epc
from backend.app.db.connection import db_engine
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
)
class Outputs:
@ -42,7 +46,7 @@ class Outputs:
"flat_roof_insulation": "Flat roof (Out of scope - prov sum only)",
"room_in_roof_insulation": "RIR (POA - Prov sum only)",
"ev_charging": "EV Charging",
"battery": "Battery"
"battery": "Battery",
}
def __init__(self, format, portfolio_id):
@ -67,28 +71,38 @@ class Outputs:
# Download cleaned data
self.cleaned_epc_lookup = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
bucket_name="retrofit-data-dev",
)
self.cleaned_epc_lookup = msgpack.unpackb(self.cleaned_epc_lookup, raw=False)
def get_properties_from_db(self):
# Get properties and their details for a specific portfolio
properties_query = self.session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == self.portfolio_id # Filter by portfolio ID
).all()
properties_query = (
self.session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(
PropertyModel.portfolio_id
== self.portfolio_id # Filter by portfolio ID
)
.all()
)
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
{
**{
col.name: getattr(prop.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for prop in properties_query
]
@ -96,10 +110,14 @@ class Outputs:
def get_plans_from_db(self):
plans_query = self.session.query(Plan).filter(Plan.portfolio_id == self.portfolio_id).all()
plans_query = (
self.session.query(PlanModel)
.filter(PlanModel.portfolio_id == self.portfolio_id)
.all()
)
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
@ -107,28 +125,38 @@ class Outputs:
def get_recommendations_from_db(self, plan_ids):
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = self.session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
recommendations_query = (
self.session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(
PlanModel,
PlanModel.id
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True, # Filtering for default recommendations
)
.all()
)
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{
**{
col.name: getattr(rec.Recommendation, col.name) if
hasattr(rec, 'Recommendation') else getattr(rec, col.name)
col.name: (
getattr(rec.Recommendation, col.name)
if hasattr(rec, "Recommendation")
else getattr(rec, col.name)
)
for col in Recommendation.__table__.columns
},
"Scenario ID": rec.scenario_id
} for rec in recommendations_query
"Scenario ID": rec.scenario_id,
}
for rec in recommendations_query
]
return recommendations_data
@ -148,7 +176,9 @@ class Outputs:
measure_label = self.MDS_MEASURE_MAPPING.get(measure_type, None)
# If the property_id already exists in the collected rows, update it
existing_row = next((item for item in rows if item["property_id"] == property_id), None)
existing_row = next(
(item for item in rows if item["property_id"] == property_id), None
)
if existing_row is None:
# Create a new row if the property_id doesn't exist
new_row = {measure: None for measure in all_measures}
@ -196,7 +226,7 @@ class Outputs:
properties_data = self.get_properties_from_db()
plans_data = self.get_plans_from_db()
plan_ids = [plan['id'] for plan in plans_data]
plan_ids = [plan["id"] for plan in plans_data]
recommendations_data = self.get_recommendations_from_db(plan_ids)
self.session.close()
@ -209,50 +239,54 @@ class Outputs:
scenario_ids = plans_df["scenario_id"].unique()
# We start to create the MDS sheet
mds = properties_df[
[
"property_id",
"address",
"postcode",
"uprn",
"current_epc_rating",
"current_sap_points",
"primary_energy_consumption",
"property_type",
"built_form",
"total_floor_area",
"walls",
"tenure",
"mainfuel",
# The bills columns are split out - we include them and aggregate, without appliances
"heating_cost_current",
"hot_water_cost_current",
"lighting_cost_current",
"gas_standing_charge",
"electricity_standing_charge"
mds = (
properties_df[
[
"property_id",
"address",
"postcode",
"uprn",
"current_epc_rating",
"current_sap_points",
"primary_energy_consumption",
"property_type",
"built_form",
"total_floor_area",
"walls",
"tenure",
"mainfuel",
# The bills columns are split out - we include them and aggregate, without appliances
"heating_cost_current",
"hot_water_cost_current",
"lighting_cost_current",
"gas_standing_charge",
"electricity_standing_charge",
]
]
].copy().rename(
columns={
"address": "Address",
"postcode": "Postcode",
"uprn": "UPRN",
"current_epc_rating": "Pre EPC",
"current_sap_points": "EPC Source",
"primary_energy_consumption": "Existing Heating Demand Kwh/m2/y",
"property_type": "Property Type",
"built_form": "Built Form",
"total_floor_area": "Floor area m2 (If known)",
"walls": "Wall Type (Mandatory field)",
"tenure": "Tenure",
}
.copy()
.rename(
columns={
"address": "Address",
"postcode": "Postcode",
"uprn": "UPRN",
"current_epc_rating": "Pre EPC",
"current_sap_points": "EPC Source",
"primary_energy_consumption": "Existing Heating Demand Kwh/m2/y",
"property_type": "Property Type",
"built_form": "Built Form",
"total_floor_area": "Floor area m2 (If known)",
"walls": "Wall Type (Mandatory field)",
"tenure": "Tenure",
}
)
)
mds["Estimated bill (£ per year)"] = (
mds["heating_cost_current"] +
mds["hot_water_cost_current"] +
mds["lighting_cost_current"] +
mds["gas_standing_charge"] +
mds["electricity_standing_charge"]
mds["heating_cost_current"]
+ mds["hot_water_cost_current"]
+ mds["lighting_cost_current"]
+ mds["gas_standing_charge"]
+ mds["electricity_standing_charge"]
)
mds = mds.drop(
@ -261,65 +295,84 @@ class Outputs:
"hot_water_cost_current",
"lighting_cost_current",
"gas_standing_charge",
"electricity_standing_charge"
"electricity_standing_charge",
]
)
# Formatting - Pre EPC is an enum
mds["Pre EPC"] = [x.value for x in mds["Pre EPC"].values]
mds["Wall Type (Mandatory field)"] = mds["Wall Type (Mandatory field)"].str.split(",").str[0]
mds["Wall Type (Mandatory field)"] = (
mds["Wall Type (Mandatory field)"].str.split(",").str[0]
)
# Remove average thermal transmittance field
mds["Wall Type (Mandatory field)"] = np.where(
mds["Wall Type (Mandatory field)"].str.contains("Average thermal transmittance"),
mds["Wall Type (Mandatory field)"].str.contains(
"Average thermal transmittance"
),
"",
mds["Wall Type (Mandatory field)"]
mds["Wall Type (Mandatory field)"],
)
mds = mds.merge(
pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[["clean_description", "fuel_type"]],
pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[
["clean_description", "fuel_type"]
],
left_on="mainfuel",
right_on="clean_description",
how="left"
how="left",
)
mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(
columns=["clean_description", "mainfuel"]
)
mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(columns=["clean_description", "mainfuel"])
mds["Existing Fuel Type"].value_counts()
mds_output_by_scenario = {}
for scenario_id in scenario_ids:
scenario_recommendations = recommendations_df[recommendations_df["Scenario ID"] == scenario_id]
scenario_recommendations = recommendations_df[
recommendations_df["Scenario ID"] == scenario_id
]
# For each measure, we create the measure matrix
scenario_measure_matrix = self.make_mds_measure_matrix(scenario_recommendations)
scenario_measure_matrix = self.make_mds_measure_matrix(
scenario_recommendations
)
# Calculate the predicted impact on: SAP, heat demand, bills, kwh
recommendation_impacts = scenario_recommendations.groupby("property_id")[
["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"]
].sum().reset_index()
recommendation_impacts = (
scenario_recommendations.groupby("property_id")[
["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"]
]
.sum()
.reset_index()
)
scenario_mds = mds.merge(
scenario_measure_matrix, how="left", on="property_id"
).merge(
recommendation_impacts, how="left", on="property_id"
)
).merge(recommendation_impacts, how="left", on="property_id")
# If we have no recommendations, sap_points, kwh_savings, head_demand will be NaN
to_clean = [c for c in recommendation_impacts.columns if c != "property_id"]
for col in to_clean:
scenario_mds[col].fillna(0, inplace=True)
scenario_mds.fillna(0, inplace=True)
scenario_mds["Post SAP"] = scenario_mds["EPC Source"] + scenario_mds["sap_points"]
scenario_mds["Post SAP"] = (
scenario_mds["EPC Source"] + scenario_mds["sap_points"]
)
# Round Post SAP down to the nearest integer
scenario_mds["Post SAP"] = scenario_mds["Post SAP"].apply(lambda x: int(x))
scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(lambda x: sap_to_epc(x))
scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(
lambda x: sap_to_epc(x)
)
scenario_mds["Heating Demand Kwh/m2/y"] = (
scenario_mds["Existing Heating Demand Kwh/m2/y"] - scenario_mds["heat_demand"]
scenario_mds["Existing Heating Demand Kwh/m2/y"]
- scenario_mds["heat_demand"]
)
scenario_mds = scenario_mds.rename(
columns={
"sap_points": "Predicted SAP Points",
"kwh_savings": "Energy Saving (Kwh)",
"energy_cost_savings": "Bill Reduction (£ per yr)"
"energy_cost_savings": "Bill Reduction (£ per yr)",
}
)
@ -330,7 +383,7 @@ class Outputs:
save_excel_to_s3(
df=scenario_mds,
file_key=f"engine_outputs/{self.format}/{self.today}_scenario_id={scenario_id}.xlsx",
bucket_name="retrofit-data-dev"
bucket_name="retrofit-data-dev",
)
def export(self):

View file

@ -490,7 +490,7 @@ class Property:
for rec_id in rec_ids:
sim_epc = self.simulation_epcs[rec_id].copy()
rec_impact = [x for x in impact_summary if x["recommendation_id"] == rec_id][0]
# We update all of the features that should have an impact on the kwh model
# We update all features that should have an impact on the kwh model
sim_epc.update(
{
@ -660,8 +660,6 @@ class Property:
self.set_floor_type()
self.set_floor_level()
self.set_windows_count()
self.set_energy_source()
self.find_energy_sources()
self.set_current_energy(kwh_client, kwh_predictions)
def set_solar_panel_configuration(self, solar_panel_configuration):
@ -1168,202 +1166,6 @@ class Property:
if condition_data.get("windows_area") is not None \
else None
def set_energy_source(self):
"""
This method sets the energy source of the property, based on the mains gas flag and energy tariff.
"""
# Default to "electricity_and_gas" to cover most scenarios including when mains_gas_flag is True
energy_source = "electricity_and_gas"
# If the tariff explicitly indicates electricity use without a dual indication and mains_gas_flag is not True
# We check for the common electricity tariffs
if not self.data["mains-gas-flag"] and self.data["energy-tariff"] in [
"Single",
"off-peak 7 hour",
"off-peak 10 hour",
"off-peak 18 hour",
"standard tariff",
"24 hour",
]:
energy_source = "electricity"
# Set the energy source based on the conditions above
self.energy_source = energy_source
def find_energy_sources(self):
# Based on the heating and the hot water
heating_fuel_mapping = {
'has_mains_gas': 'Natural Gas',
'has_electric': 'Electricity',
'has_oil': 'Oil',
'has_wood_logs': 'Wood Logs',
'has_coal': 'Coal',
'has_anthracite': 'Anthracite',
'has_smokeless_fuel': 'Smokeless Fuel',
'has_lpg': 'LPG',
'has_b30k': 'B30K Biofuel',
'has_air_source_heat_pump': 'Electricity',
'has_ground_source_heat_pump': 'Electricity',
'has_water_source_heat_pump': 'Electricity',
'has_electric_heat_pump': 'Electricity',
'has_solar_assisted_heat_pump': 'Electricity',
'has_exhaust_source_heat_pump': 'Electricity',
'has_community_heat_pump': 'Electricity',
'has_wood_pellets': 'Wood Pellets',
'has_community_scheme': 'Varied (Community Scheme)',
"has_dual_fuel_mineral_and_wood": 'Wood Logs',
"has_electricaire": 'Electricity',
"has_wood_chips": 'Wood Logs'
}
# Hot water
heater_type_to_fuel = {
'gas instantaneous': 'Natural Gas',
'electric heat pump': 'Electricity',
'electric immersion': 'Electricity',
'gas boiler': 'Natural Gas',
'oil boiler': 'Oil',
'electric instantaneous': 'Electricity',
'gas multipoint': 'Natural Gas',
'heat pump': 'Electricity',
'solid fuel boiler': 'Solid Fuel',
'solid fuel range cooker': 'Solid Fuel',
'room heaters': 'Varied', # Could be any fuel, further specifics needed based on context
"single-point gas": "Natural Gas"
}
# Define a mapping from system types to general categories or modifications of fuel types
system_type_modification = {
'from main system': 'Main System',
'from secondary system': 'Secondary System',
'from second main heating system': 'Secondary System',
'community scheme': 'Community Scheme'
}
hotwater_appliance_to_fuel = {
'gas range cooker': 'Natural Gas',
'oil range cooker': 'Oil'
}
fuel_map = {
None: "Natural Gas (Community Scheme)",
"mains gas": "Natural Gas (Community Scheme)",
"biomass": "Smokeless Fuel",
"electricity": "Electricity",
"biogas": "Smokeless Fuel",
"heat network": "Natural Gas (Community Scheme)",
"lpg": 'LPG',
"biodiesel": "Smokeless Fuel",
"b30d": "B30K Biofuel",
"coal": "Coal",
"oil": "Oil",
"unknown": None # Handle - anything post 2020 is electricity else gas
}
self.heating_energy_source = list({
fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False)
})
if set(self.heating_energy_source) == {'Electricity', 'Natural Gas'}:
# It means they have mixed heating so we take the primary one, based on main fuel
# This will probably happen in the case of an extension
if self.main_fuel["clean_description"] in ["Mains gas not community", "Mains gas community"]:
self.heating_energy_source = ['Natural Gas']
else:
self.heating_energy_source = ['Electricity']
if set(self.heating_energy_source) == {'Electricity', 'LPG'}:
if self.main_fuel["clean_description"] in ["Lpg not community", "Lpg community"]:
self.heating_energy_source = ['LPG']
else:
self.heating_energy_source = ['Electricity']
if set(self.heating_energy_source) == {'Natural Gas', 'Wood Logs'}:
# It means they have mixed heating so we take the primary one, based on main fuel
# This will probably happen in the case of an extension
if self.main_fuel["clean_description"] in ["Mains gas not community", "Mains gas community"]:
self.heating_energy_source = ['Natural Gas']
else:
self.heating_energy_source = ['Wood Logs']
if len(self.heating_energy_source) > 1 and "Varied (Community Scheme)" not in self.heating_energy_source:
# We might have something like heating energy source equal to ['Natural Gas', 'Varied (Community Scheme)']
# so we treat this as community heating
raise Exception("Investigate me")
if len(self.heating_energy_source) == 0:
heating_flags = {
v for k, v in self.main_heating.items() if k not in ["original_description", "clean_description"]
}
hotwater_flags = {
v for k, v in self.hotwater.items() if k not in ["original_description", "clean_description"]
}
# If all flags are zero, we have a no data example
if (heating_flags == {False} or hotwater_flags == {None}) and (
hotwater_flags == {False} or hotwater_flags == {None}):
# We have nodata so we try and rely on main fuel
if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown
mapped_fuel = fuel_map[self.main_fuel["fuel_type"]]
self.heating_energy_source = mapped_fuel
self.hot_water_energy_source = mapped_fuel
return
else:
raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}")
# We handle edge case where no heating system is indicated
if self.main_fuel["fuel_type"] in fuel_map:
mapped_fuel = fuel_map[self.main_fuel["fuel_type"]]
self.heating_energy_source = mapped_fuel
self.hot_water_energy_source = mapped_fuel
return
if len(self.heating_energy_source) > 1:
# We treat this as a community scheme
self.heating_energy_source = ["Varied (Community Scheme)"]
self.heating_energy_source = self.heating_energy_source[0]
if self.heating_energy_source == "Varied (Community Scheme)":
if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown
mapped_to = fuel_map[self.main_fuel["fuel_type"]]
if mapped_to is None and self.main_fuel["fuel_type"] == "unknown":
# Handle logic based on age band
if self.year_built >= 2020:
self.heating_energy_source = "Electricity"
else:
self.heating_energy_source = "Natural Gas (Community Scheme)"
else:
self.heating_energy_source = mapped_to
else:
raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}")
if self.hotwater["heater_type"] is not None:
self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]]
if self.hotwater["extra_features"] == "plus solar":
self.hot_water_energy_source = self.heating_energy_source + " + Solar Thermal"
return
elif self.hotwater["system_type"] is not None:
fuel = system_type_modification[self.hotwater["system_type"]]
if self.hotwater["extra_features"] == "plus solar":
self.hot_water_energy_source = self.heating_energy_source + " + Solar Thermal"
return
if fuel in ['Main System', "Community Scheme"]:
self.hot_water_energy_source = self.heating_energy_source
elif fuel in ['Secondary System']:
# Check the secondary heating system
secondary_heating = self.data["secondheat-description"]
self.hot_water_energy_source = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[secondary_heating]["fuel"]
else:
raise NotImplementedError(f"Investiage me - unhandled hot water fuel {fuel}")
else:
self.hot_water_energy_source = hotwater_appliance_to_fuel[self.hotwater["appliance"]]
def is_ashp_valid(self, measures):
if "air_source_heat_pump" in self.non_invasive_recommendations:

View file

@ -45,12 +45,14 @@ cp .env.example .env
## Running the Application
from within the application you can run with the following command:
from `model/backend/` you can run with the following command:
```commandline
uvicorn app.main:app --reload
```
Or run `sh run_local.sh`, which runs that same uvicorn command.
You application will be available at the designated url
## API Documentation
@ -172,7 +174,7 @@ For instance, if your server is running locally on port 8000, you can use curl
to get a dummy token:
```commandline
curl http://localhost:8000/dummy-token
curl http://localhost:8000/local/dummy-token
```
You will receive a response containing the dummy JWT

View file

@ -1,4 +1,17 @@
FROM public.ecr.aws/lambda/python:3.10
# FROM python:3.11.10-bullseye
ARG DEV_DB_HOST
ARG DEV_DB_PORT
ARG DEV_DB_NAME
ARG EPC_AUTH_TOKEN
ENV DB_HOST=${DEV_DB_HOST}
ENV DB_PORT=${DEV_DB_PORT}
ENV DB_NAME=${DEV_DB_NAME}
ENV EPC_AUTH_TOKEN=${EPC_AUTH_TOKEN}
# Set working directory (Lambda task root)
WORKDIR /var/task
@ -8,13 +21,17 @@ WORKDIR /var/task
# -----------------------------
COPY backend/address2UPRN/handler/requirements.txt .
# Install dependencies into Lambda runtime
RUN pip install --no-cache-dir -r requirements.txt
# -----------------------------
# Copy application code
# -----------------------------
# Copy necessary files for database and utility imports
COPY utils/ utils/
COPY backend/ backend/
COPY datatypes/ datatypes/
# Copy the handler
COPY backend/address2UPRN/main.py .
# -----------------------------

View file

@ -1,3 +1,11 @@
epc-api-python==1.0.2
pandas==2.2.2
numpy<2.0
requests
tqdm
pandas
openpyxl
epc-api-python==1.0.2
boto3==1.35.44
sqlmodel
sqlalchemy==2.0.36
psycopg2-binary==2.9.10
pydantic-settings==2.6.0

View file

@ -3,12 +3,23 @@ import os
from urllib.parse import urlencode
import pandas as pd
from difflib import SequenceMatcher
from tqdm import tqdm
from utils.logger import setup_logger
import re
from typing import Set
import json
import requests
from uuid import UUID
import uuid
from backend.app.db.functions.tasks.Tasks import SubTaskInterface
from utils.s3 import (
save_csv_to_s3,
read_csv_from_s3 as read_csv_from_s3_dict,
parse_s3_uri,
)
from datetime import datetime
logger = setup_logger()
import re
EPC_AUTH_TOKEN = os.getenv(
"EPC_AUTH_TOKEN",
@ -17,9 +28,28 @@ EPC_AUTH_TOKEN = os.getenv(
if EPC_AUTH_TOKEN is None:
raise RuntimeError("EPC_AUTH_TOKEN not defined in env")
import re
from difflib import SequenceMatcher
from typing import Set
def is_valid_postcode(postcode_clean: str) -> bool:
"""
Validate postcode using postcodes.io.
Expects a sanitised postcode (e.g. E84SQ).
Returns True if valid, False otherwise.
"""
POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
if not postcode_clean:
return False
try:
resp = requests.get(
POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
timeout=5,
)
resp.raise_for_status()
return resp.json().get("result", False)
except requests.RequestException:
# Network issues, rate limits, etc.
return False
def levenshtein(a: str, b: str) -> float:
@ -300,27 +330,29 @@ def get_uprn_candidates(
)
def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
def get_uprn_with_epc_df(
user_inputed_address: str,
epc_df: pd.DataFrame,
verbose: bool = False,
):
"""
Return uprn (str)
Return False if failed to find a sensible matching epc
Return Nons when epc found but no UPRN
Return uprn (str) using a pre-fetched EPC dataframe.
This avoids calling the API multiple times for the same postcode.
"""
df = get_epc_data_with_postcode(postcode=postcode)
if df.empty:
if epc_df.empty:
return None
scored_df = get_uprn_candidates(
df,
epc_df,
user_address=user_inputed_address,
)
# Best score
best_score = scored_df.iloc[0]["lexiscore"]
if best_score <= 0:
return None
# # Return None if score is below threshold
# if best_score < 0.7:
# return None
# All rank-1 rows (possible draw)
top_rank_df = scored_df[scored_df["lexirank"] == 1]
@ -330,18 +362,41 @@ def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
return None
address = top_rank_df["address"].values[0]
lexiscore = float(top_rank_df["lexiscore"].values[0])
score = float(top_rank_df["lexiscore"].values[0])
logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
logger.info(f"Address found to be: {address}, with lexiscore {score}")
# Safe to return the agreed UPRN
found_uprn = top_rank_df.iloc[0]["uprn"]
if found_uprn == "":
return None
if return_address:
return found_uprn, address
return found_uprn
if verbose:
return (found_uprn, address, score)
else:
return found_uprn
def get_uprn(
user_inputed_address: str,
postcode: str,
verbose: bool = False,
):
"""
Return uprn (str)
Return False if failed to find a sensible matching epc
Return None when epc found but no UPRN
This function fetches EPC data via API for a single postcode.
For processing multiple addresses in the same postcode, use get_uprn_with_epc_df instead.
"""
df = get_epc_data_with_postcode(postcode=postcode)
return get_uprn_with_epc_df(
user_inputed_address=user_inputed_address,
epc_df=df,
verbose=verbose,
)
def resolve_uprns_for_postcode_group(
@ -424,148 +479,302 @@ def resolve_uprns_for_postcode_group(
)
def test(a, b):
assert a == b, f"erorr: {a}{type(a)} != {b}: {type(b)}"
def save_results_to_s3(
results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
) -> bool:
"""
Save results DataFrame to S3 as CSV.
:param results_df: The DataFrame containing results
:param task_id: The task ID (used for file naming)
:param bucket_name: The S3 bucket name (defaults to env variable)
:return: True if successful, False otherwise
"""
if bucket_name is None:
bucket_name = os.getenv("S3_BUCKET_NAME")
if not bucket_name:
logger.error(
"S3 bucket name not provided and S3_BUCKET_NAME environment variable not set"
)
return False
try:
# Create a filename with the task ID
file_name = f"{datetime.now().isoformat()}_{str(uuid.uuid4())[:8]}"
file_key = f"ara_raw_outputs/{task_id}/{sub_task_id}/{file_name}.csv"
# Save to S3
success = save_csv_to_s3(results_df, bucket_name, file_key)
if success:
logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}")
return True
else:
logger.error(f"Failed to save results to S3")
return False
except Exception as e:
logger.error(f"Error saving results to S3: {str(e)}")
return False
def run_all_test():
# Basic usage with different post codes styles
test(get_epc_data_with_postcode("b93 8sy").shape[0], 63)
test(get_epc_data_with_postcode("B938sy").shape[0], 63)
test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
def handler(event, context, local=False):
print("=== Address2UPRN Lambda Handler ===")
print(f"Function: {context.function_name}")
print(f"Request ID: {context.aws_request_id}")
test(get_uprn("68", "b93 8sy"), "100070989938")
test(get_uprn("68 Glendon Way", "b93 8sy"), "100070989938")
test(get_uprn("Flat A, 28, Nelgarde Road", "se6 4tf"), "100023278633")
test(get_uprn("28 A", "se6 4tf"), "100023278633")
test(get_uprn("28A", "se6 4tf"), "100023278633")
test(get_uprn("6 Aitken Close", "E8 4SQ"), False)
# Handle local testing
if local is True:
event = {
"Records": [
{
"body": json.dumps(
{
"task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
"sub_task_id": "6a427b6e-1ece-4983-b1e5-9bffccc53d1d",
"s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-16T12:00:20.257856_7b520c0e.csv",
}
)
}
]
}
# unique case
test(get_uprn("Flat 5, 1, Semley Gate", "e9 5nh"), "10008238198")
test(get_uprn("5 , 1 Semley Gate", "e9 5nh"), "10008238198")
test(get_uprn("5 Semley Gate", "e9 5nh"), "10008238198")
test(get_uprn("1, 5 Semley Gate", "e9 5nh"), False)
test(
get_uprn("1 Semley Gate", "e9 5nh"), "10008238188"
) # this one return "flat 1, in 1 semley gate"
test(
get_uprn("48 Oswald Street", "E5 0BT"), False
) # this one return "flat 1, in 1 semley gate"
test(
get_uprn("42 Oswald Street", "E5 0BT"), False
) # this one return "flat 1, in 1 semley gate"
test(
get_uprn("46 Oswald Street", "E5 0BT"), False
) # this one return "flat 1, in 1 semley gate"
get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street")
get_uprn_candidates(
get_epc_data_with_postcode("Cr2 7dl"),
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",
)
print(f"Event: {json.dumps(event, indent=2, default=str)}")
print("===================================")
# Handle both single event and batch events (SQS, etc.)
records = event.get("Records", [event])
results = []
errors = []
subtask_interface = SubTaskInterface()
if __name__ == "__main__":
INPUT_FILE = "hackney.xlsx"
ADDRESS_COL = "Address 1"
POSTCODE_COL = "Postcode"
UPRN_COL = "UPRN"
df = pd.read_excel(INPUT_FILE)
failures = []
for _, row in tqdm(
df.iterrows(),
total=len(df),
desc="Auditing UPRNs",
):
input_address = str(row[ADDRESS_COL]).strip()
postcode = str(row[POSTCODE_COL]).strip()
expected_uprn = None if pd.isna(row[UPRN_COL]) else str(int(row[UPRN_COL]))
for record in records:
task_id = None
subtask_id = None
try:
epc_df = get_epc_data_with_postcode(postcode)
# Parse body (inputs)
if isinstance(record.get("body"), str):
body = json.loads(record["body"])
else:
body = record.get("body", {})
if epc_df.empty:
failures.append(
{
**row.to_dict(),
"found_uprn": None,
"best_match_uprn": None,
"best_match_address": None,
"best_match_lexiscore": None,
"status": "no_epc_results",
}
# Validate required fields
task_id = body.get("task_id")
subtask_id = body.get("sub_task_id")
s3_uri = body.get("s3_uri")
if not task_id:
errors.append({"error": "Missing required field: task_id"})
continue
if not subtask_id:
errors.append({"error": "Missing required field: sub_task_id"})
continue
if not s3_uri:
errors.append({"error": "Missing required field: s3_uri"})
continue
# Convert task_id to UUID
try:
task_id = UUID(task_id) if isinstance(task_id, str) else task_id
except ValueError as e:
errors.append({"error": f"Invalid UUID format for task_id: {str(e)}"})
continue
# Convert sub_task_id to UUID
try:
subtask_id = (
UUID(subtask_id) if isinstance(subtask_id, str) else subtask_id
)
except ValueError as e:
errors.append(
{"error": f"Invalid UUID format for sub_task_id: {str(e)}"}
)
continue
scored_df = get_uprn_candidates(
epc_df,
user_address=input_address,
)
# Update existing subtask to 'in progress'
subtask_interface.update_subtask_status(subtask_id, "in progress")
logger.info(f"Processing subtask {subtask_id} for task {task_id}")
best_row = scored_df.iloc[0]
# Parse S3 URI and read CSV from S3
logger.info(f"Reading data from S3: {s3_uri}")
try:
bucket, key = parse_s3_uri(s3_uri)
csv_data = read_csv_from_s3_dict(bucket, key)
df = pd.DataFrame(csv_data)
logger.info(f"Loaded {len(df)} rows from S3")
except Exception as s3_error:
logger.error(f"Failed to read data from S3: {s3_error}")
errors.append(
{"error": "Failed to read data from S3", "details": str(s3_error)}
)
try:
subtask_interface.update_subtask_status(
subtask_id, "failed", outputs={"error": str(s3_error)}
)
except Exception as db_error:
logger.error(f"Failed to update subtask status: {db_error}")
continue
best_match_uprn = str(best_row["uprn"])
best_match_address = best_row["address"]
best_match_lexiscore = round(float(best_row["lexiscore"]), 4)
# Process the rows
logger.info(f"Processing {len(df)} rows for task {task_id}")
found_uprn = get_uprn(input_address, postcode)
# Create user_input column by concatenating Address columns if not already present
if "user_input" not in df.columns:
df["user_input"] = (
df["Address 1"].fillna("")
+ " "
+ df["Address 2"].fillna("")
+ " "
+ df["Address 3"].fillna("")
).str.strip()
logger.info(f"Created user_input column from Address 1 and Address 2")
else:
logger.info(f"user_input column already present in data")
clean_df = df.dropna(subset=["postcode_clean"])
postcode_to_addresses = {
postcode: group.to_dict(orient="records")
for postcode, group in clean_df.groupby("postcode_clean", sort=False)
}
logger.info(f"Total postcodes: {len(postcode_to_addresses)}")
# Process each postcode group
results_data = []
for postcode, postcode_rows in postcode_to_addresses.items():
logger.info(
f"Processing postcode: {postcode} with {len(postcode_rows)} rows"
)
# Validate postcode before processing
if not is_valid_postcode(postcode):
logger.warning(f"Postcode {postcode} is invalid, skipping")
continue
# Fetch EPC data once per postcode
try:
epc_df = get_epc_data_with_postcode(postcode=postcode)
logger.info(
f"Fetched {len(epc_df)} EPC records for postcode {postcode}"
)
except Exception as e:
logger.error(
f"Failed to fetch EPC data for postcode {postcode}: {e}"
)
continue
# Process each address in this postcode with the same EPC data
for row in postcode_rows:
try:
user_input = row.get("user_input", "")
if not user_input:
logger.warning(
f"Skipping row with missing user_input for postcode {postcode}"
)
continue
# Get UPRN using the pre-fetched EPC data with all return options
result = get_uprn_with_epc_df(
user_inputed_address=user_input, epc_df=epc_df, verbose=True
)
# Parse result tuple if successful
if result:
uprn, found_address, score = result
logger.info(
f"Found UPRN for {user_input} in {postcode}: {uprn} (score: {score})"
)
results_data.append(
{
**row, # Include all original data
"uprn": uprn,
"domna_found_address": found_address,
"domna_lexiscore": score,
}
)
else:
logger.warning(
f"No UPRN found for {user_input} in {postcode}"
)
results_data.append(
{
**row, # Include all original data
"uprn": None,
"domna_found_address": None,
"domna_lexiscore": None,
}
)
except Exception as e:
logger.error(
f"Error processing address {row.get('user_input', 'unknown')}: {e}"
)
# Still add the row with error markers
results_data.append(
{
**row,
"uprn": None,
"domna_found_address": None,
"domna_lexiscore": None,
"error": str(e),
}
)
continue
# Create results DataFrame
result_df = pd.DataFrame(results_data)
# Save results to S3
try:
save_results_to_s3(result_df, str(task_id), str(subtask_id))
except Exception as s3_error:
logger.error(f"Failed to save results to S3: {s3_error}")
# Mark subtask as completed
try:
subtask_interface.update_subtask_status(
subtask_id,
"completed",
outputs={"rows_processed": "todo -> show sensible output"},
)
logger.info(f"Marked subtask {subtask_id} as completed")
except Exception as db_error:
logger.error(f"Failed to mark subtask as completed: {db_error}")
except Exception as e:
failures.append(
{
**row.to_dict(),
"found_uprn": None,
"best_match_uprn": None,
"best_match_address": None,
"best_match_lexiscore": None,
"status": "exception",
"error": str(e),
}
)
continue
logger.error(f"Unexpected error processing record: {e}", exc_info=True)
errors.append({"error": "Unexpected error", "details": str(e)})
# Mark subtask as failed if we have one
if subtask_id:
try:
subtask_interface.update_subtask_status(
subtask_id, "failed", outputs={"error": str(e)}
)
except Exception as db_error:
logger.error(f"Failed to update subtask status: {db_error}")
found_uprn_norm = None if not found_uprn else str(found_uprn)
# Return error if all records failed
logger.info(results_data)
logger.info(results)
if errors and not results:
return {"statusCode": 500, "body": json.dumps({"errors": errors})}
if found_uprn_norm != expected_uprn:
failures.append(
{
**row.to_dict(),
"found_uprn": found_uprn_norm,
"best_match_uprn": best_match_uprn,
"best_match_address": best_match_address,
"best_match_lexiscore": best_match_lexiscore,
"status": ("no_match" if found_uprn_norm is None else "mismatch"),
}
)
failures_df = pd.DataFrame(failures)
print("===================================")
print(f"Total rows : {len(df)}")
print(f"Failures : {len(failures_df)}")
print("===================================")
failures_df.to_excel(
"hackney_uprn_failures.xlsx",
index=False,
)
return {
"statusCode": 200,
"body": json.dumps(
{"processed": results, "errors": errors if errors else None}
),
}
def handler(event, context):
print("hello world")
return {"statusCode": 200, "body": "hello world"}
# TO do function dispatcher,
# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate)
# fix that
# Look again at flat 1
# pandas reader the seperate postcode_splitter
# dump into s3
# TODO:
# Don't add results to return messages as its too verbose
# capture the exepection as e, into s3, to find the logs go to s3
# Upload results to s3 as well as csv

View file

@ -1,3 +1,5 @@
# one time script for a customer forhousing
import pandas as pd
from tqdm import tqdm
from backend.address2UPRN.main import get_uprn
@ -5,20 +7,35 @@ from backend.address2UPRN.main import get_uprn
# Enable tqdm for pandas
tqdm.pandas()
df = pd.read_excel("address2.xlsx")
file_name = "forhousing.xlsx"
df = pd.read_excel(file_name)
def extract_uprn(row):
print(row["User Input"], row["Postcode"])
result = get_uprn(row["User Input"], row["Postcode"], return_address=True)
user_input = "Address"
postcode = "Postcode"
result = get_uprn(
row[user_input],
row[postcode],
return_address=True,
return_EPC=True,
return_score=True,
)
if result is None:
return pd.Series([None, None])
return pd.Series([None, None, None, None])
uprn, found_address = result
return pd.Series([uprn, found_address])
uprn, found_address, epc, score = result
return pd.Series([uprn, found_address, epc, score])
df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1)
df[["juntes uprn", "junte found address", "junte found epc", "junte score"]] = (
df.progress_apply(extract_uprn, axis=1)
)
df.to_excel("outputs2.xlsx", index=False)
df.to_excel(f"{file_name}_outputs.xlsx", index=False)
# TODO: add lexiscore
# TODO: run it
# TODO: give it to danny

View file

@ -1,54 +1,67 @@
import os
from functools import lru_cache
from pathlib import Path
from pydantic_settings import BaseSettings, SettingsConfigDict
from typing import Optional
from utils.logger import setup_logger
logger = setup_logger()
def resolve_env_file() -> Optional[str]:
env = os.getenv("ENVIRONMENT", "local")
backend_dir = Path(__file__).resolve().parents[1]
if env == "local":
return "backend/.env"
env_file = backend_dir / ".env"
print("USING ENV FILE:", env_file)
logger.debug("USING ENV FILE:", env_file)
return str(env_file)
if env == "test":
return "backend/.env.test"
env_file = backend_dir / ".env.test"
logger.debug("USING ENV FILE:", env_file)
return str(env_file)
# prod = no env file
return None
class Settings(BaseSettings):
API_KEY: str
API_KEY: str = "changeme"
API_KEY_NAME: str = "X-API-KEY"
SECRET_KEY: str
ENVIRONMENT: str
DATA_BUCKET: str
SECRET_KEY: str = "changeme"
ENVIRONMENT: str = "changeme"
DATA_BUCKET: str = "changeme"
PLAN_TRIGGER_BUCKET: str
ENGINE_SQS_URL: str
ENGINE_SQS_URL: str = "changeme"
CATEGORISATION_SQS_URL: str = "changeme"
# Third parties
EPC_AUTH_TOKEN: str
GOOGLE_SOLAR_API_KEY: str
EPC_AUTH_TOKEN: str = "changeme"
GOOGLE_SOLAR_API_KEY: str = "changeme"
# Database settings
DB_HOST: str
DB_PASSWORD: str
DB_USERNAME: str
DB_PORT: str
DB_NAME: str
DB_HOST: str = "changeme"
DB_PASSWORD: str = "changeme"
DB_USERNAME: str = "changeme"
DB_PORT: str = "changeme"
DB_NAME: str = "changeme"
# Prediction buckets
SAP_PREDICTIONS_BUCKET: str
CARBON_PREDICTIONS_BUCKET: str
HEAT_PREDICTIONS_BUCKET: str
SAP_PREDICTIONS_BUCKET: str = "changeme"
CARBON_PREDICTIONS_BUCKET: str = "changeme"
HEAT_PREDICTIONS_BUCKET: str = "changeme"
# LIGHTING_COST_PREDICTIONS_BUCKET: str
# HEATING_COST_PREDICTIONS_BUCKET: str
# HOT_WATER_COST_PREDICTIONS_BUCKET: str
HEATING_KWH_PREDICTIONS_BUCKET: str
HOTWATER_KWH_PREDICTIONS_BUCKET: str
HEATING_KWH_PREDICTIONS_BUCKET: str = "changeme"
HOTWATER_KWH_PREDICTIONS_BUCKET: str = "changeme"
# Other S3 buckts
ENERGY_ASSESSMENTS_BUCKET: str
ENERGY_ASSESSMENTS_BUCKET: str = "changeme"
# Optional AWS creds (only required in local)
AWS_ACCESS_KEY_ID: Optional[str] = None

5
backend/app/db/base.py Normal file
View file

@ -0,0 +1,5 @@
from sqlalchemy.orm import DeclarativeBase
class Base(DeclarativeBase):
pass

View file

@ -1,5 +1,10 @@
from sqlalchemy import func
from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation, Scenario
from backend.app.db.models.recommendations import (
PlanModel,
PlanRecommendations,
Recommendation,
ScenarioModel,
)
def aggregate_portfolio_recommendations(
@ -8,7 +13,7 @@ def aggregate_portfolio_recommendations(
scenario_id: int,
total_valuation_increase: float,
labour_days: float,
aggregated_data: dict
aggregated_data: dict,
):
# Aggregate multiple fields
aggregates = (
@ -16,15 +21,20 @@ def aggregate_portfolio_recommendations(
func.sum(Recommendation.estimated_cost).label("cost"),
func.sum(Recommendation.total_work_hours).label("total_work_hours"),
func.sum(Recommendation.kwh_savings).label("energy_savings"),
func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"),
func.sum(Recommendation.co2_equivalent_savings).label(
"co2_equivalent_savings"
),
func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"),
)
.join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id)
.join(Plan, Plan.id == PlanRecommendations.plan_id)
.join(
PlanRecommendations,
PlanRecommendations.recommendation_id == Recommendation.id,
)
.join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
.filter(
Plan.portfolio_id == portfolio_id,
Plan.scenario_id == scenario_id,
Recommendation.default == True
PlanModel.portfolio_id == portfolio_id,
PlanModel.scenario_id == scenario_id,
Recommendation.default == True,
)
.one()
)
@ -36,11 +46,11 @@ def aggregate_portfolio_recommendations(
"energy_savings": aggregates.energy_savings or 0,
"co2_equivalent_savings": aggregates.co2_equivalent_savings or 0,
"energy_cost_savings": aggregates.energy_cost_savings or 0,
**aggregated_data
**aggregated_data,
}
# Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio
portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one()
portfolio_scenario = session.query(ScenarioModel).filter_by(id=scenario_id).one()
# Update the data
for key, value in aggregates_dict.items():

View file

@ -1,17 +1,42 @@
from sqlalchemy import text
from sqlalchemy import insert, delete
from sqlalchemy.orm import Session
from typing import Any, Dict, List, Optional
from sqlalchemy import (
ColumnElement,
and_,
func,
inspect,
text,
insert,
delete,
select,
)
from sqlalchemy.orm import Session, Mapper
from sqlalchemy.exc import SQLAlchemyError
from sqlmodel import Session
from backend.app.db.models.recommendations import (
Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario
PlanModel,
Recommendation,
RecommendationMaterials,
PlanRecommendations,
ScenarioModel,
)
from backend.app.db.models.portfolio import PropertyModel
from backend.app.db.connection import db_session, db_read_session
def prepare_plan_data(
p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations,
rebaselining_carbon=0, rebaselining_heat_demand=0, rebaselining_kwh=0, rebaselining_bills=0,
p,
body,
scenario_id,
eco_packages,
valuations,
new_sap_points,
new_epc,
default_recommendations,
rebaselining_carbon=0,
rebaselining_heat_demand=0,
rebaselining_kwh=0,
rebaselining_bills=0,
):
"""
Utility function to prepare the data that goes into the production of a plan. Is a fairly rough and unstructured
@ -32,21 +57,37 @@ def prepare_plan_data(
"""
# Plan carbon savings
co2_savings = sum(
[r["co2_equivalent_savings"] for r in default_recommendations if not r.get("already_installed", False)]
[
r["co2_equivalent_savings"]
for r in default_recommendations
if not r.get("already_installed", False)
]
)
post_co2_emissions = p.energy["co2_emissions"] - rebaselining_carbon - co2_savings
# Plan bill savings
energy_bill_savings = sum(
[r["energy_cost_savings"] for r in default_recommendations if not r.get("already_installed", False)]
[
r["energy_cost_savings"]
for r in default_recommendations
if not r.get("already_installed", False)
]
)
post_energy_bill = (
sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings
)
post_energy_bill = sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings
# energy consumption
energy_consumption_savings = sum(
[r["kwh_savings"] for r in default_recommendations if not r.get("already_installed", False)]
[
r["kwh_savings"]
for r in default_recommendations
if not r.get("already_installed", False)
]
)
post_energy_consumption = (
p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings
)
post_energy_consumption = p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings
valuation_post_retrofit, valuation_increase = None, None
if valuations["current_value"]:
@ -54,9 +95,19 @@ def prepare_plan_data(
valuation_post_retrofit = valuations["average_increased_value"]
# plan costing data
cost_of_works = sum([r["total"] for r in default_recommendations if not r.get("already_installed", False)])
cost_of_works = sum(
[
r["total"]
for r in default_recommendations
if not r.get("already_installed", False)
]
)
contingency_cost = sum(
[r.get("contingency", 0) for r in default_recommendations if not r.get("already_installed", False)]
[
r.get("contingency", 0)
for r in default_recommendations
if not r.get("already_installed", False)
]
)
return {
@ -86,7 +137,7 @@ def prepare_plan_data(
"valuation_increase": valuation_increase,
"cost_of_works": float(cost_of_works),
"contingency_cost": float(contingency_cost),
"plan_type": eco_packages.get(p.id, (None, None, None))[2]
"plan_type": eco_packages.get(p.id, (None, None, None))[2],
}
@ -97,7 +148,7 @@ def create_plan(session: Session, plan):
:param plan: dictionary of data representing a plan to be created
"""
try:
new_plan = Plan(**plan)
new_plan = PlanModel(**plan)
session.add(new_plan)
session.flush()
session.commit()
@ -120,9 +171,7 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int
]
stmt = (
insert(Plan)
.values(payload)
.returning(Plan.id, Plan.property_id)
insert(PlanModel).values(payload).returning(PlanModel.id, PlanModel.property_id)
)
result = session.execute(stmt).all()
@ -133,14 +182,14 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int
def create_scenario(session: Session, scenario: dict) -> int:
existing_scenario = (
session.query(Scenario)
session.query(ScenarioModel)
.filter_by(portfolio_id=scenario["portfolio_id"])
.first()
)
scenario["is_default"] = not bool(existing_scenario)
new_scenario = Scenario(**scenario)
new_scenario = ScenarioModel(**scenario)
session.add(new_scenario)
session.flush() # ensures ID is populated
@ -167,7 +216,9 @@ def create_recommendation(session: Session, recommendation):
raise e
def create_recommendation_material(session: Session, recommendation_id, material_id, depth):
def create_recommendation_material(
session: Session, recommendation_id, material_id, depth
):
"""
This function will create a record for the recommendation_material in the database if it does not exist.
:param session: The databse session
@ -177,9 +228,7 @@ def create_recommendation_material(session: Session, recommendation_id, material
"""
new_recommendation_material = RecommendationMaterials(
recommendation_id=recommendation_id,
material_id=material_id,
depth=depth
recommendation_id=recommendation_id, material_id=material_id, depth=depth
)
session.add(new_recommendation_material)
session.flush()
@ -196,13 +245,17 @@ def create_plan_recommendations(session: Session, plan_id, recommendation_ids):
"""
# Prepare a list of dictionaries for bulk insert
data = [{"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids]
data = [
{"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids
]
# Bulk insert using SQLAlchemy's core API
session.execute(insert(PlanRecommendations).values(data))
def upload_recommendations(session: Session, recommendations_to_upload, property_id, new_plan_id):
def upload_recommendations(
session: Session, recommendations_to_upload, property_id, new_plan_id
):
try:
# Prepare data for bulk insert for Recommendation
recommendations_data = [
@ -213,8 +266,14 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
"description": rec["description"],
"estimated_cost": float(rec["total"]),
"default": rec["default"],
"starting_u_value": float(rec.get("starting_u_value")) if rec.get("starting_u_value") else None,
"new_u_value": float(rec.get("new_u_value")) if rec.get("new_u_value") else None,
"starting_u_value": (
float(rec.get("starting_u_value"))
if rec.get("starting_u_value")
else None
),
"new_u_value": (
float(rec.get("new_u_value")) if rec.get("new_u_value") else None
),
"sap_points": float(rec["sap_points"]),
"energy_savings": float(rec["heat_demand"]),
"kwh_savings": float(rec["kwh_savings"]),
@ -223,13 +282,17 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
"energy_cost_savings": float(rec["energy_cost_savings"]),
"labour_days": float(rec["labour_days"]),
"already_installed": rec["already_installed"],
"heat_demand": float(rec["heat_demand"])
"heat_demand": float(rec["heat_demand"]),
}
for rec in recommendations_to_upload
]
# Insert the recommendations, get back the IDs
stmt = insert(Recommendation).returning(Recommendation.id).values(recommendations_data)
stmt = (
insert(Recommendation)
.returning(Recommendation.id)
.values(recommendations_data)
)
result = session.execute(stmt)
uploaded_recommendation_ids = [row[0] for row in result]
@ -243,11 +306,15 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
"quantity_unit": part.get("quantity_unit", None),
"estimated_cost": float(part.get("total", part.get("total_cost"))),
}
for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids)
for rec, recommendation_id in zip(
recommendations_to_upload, uploaded_recommendation_ids
)
for part in rec["parts"]
]
session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data)
session.bulk_insert_mappings(
RecommendationMaterials, recommendation_materials_data
)
# flush the changes to get the newly created IDs
session.flush()
@ -283,25 +350,27 @@ def bulk_upload_recommendations_and_materials(
plan_ids_by_index = []
for rec in recommendation_payload:
recommendation_rows.append({
"property_id": rec["property_id"],
"type": rec["type"],
"measure_type": rec["measure_type"],
"description": rec["description"],
"estimated_cost": rec["estimated_cost"],
"default": rec["default"],
"starting_u_value": rec["starting_u_value"],
"new_u_value": rec["new_u_value"],
"sap_points": rec["sap_points"],
"heat_demand": rec["heat_demand"],
"kwh_savings": rec["kwh_savings"],
"co2_equivalent_savings": rec["co2_equivalent_savings"],
"energy_savings": rec["energy_savings"],
"energy_cost_savings": rec["energy_cost_savings"],
"total_work_hours": rec["total_work_hours"],
"labour_days": rec["labour_days"],
"already_installed": rec["already_installed"],
})
recommendation_rows.append(
{
"property_id": rec["property_id"],
"type": rec["type"],
"measure_type": rec["measure_type"],
"description": rec["description"],
"estimated_cost": rec["estimated_cost"],
"default": rec["default"],
"starting_u_value": rec["starting_u_value"],
"new_u_value": rec["new_u_value"],
"sap_points": rec["sap_points"],
"heat_demand": rec["heat_demand"],
"kwh_savings": rec["kwh_savings"],
"co2_equivalent_savings": rec["co2_equivalent_savings"],
"energy_savings": rec["energy_savings"],
"energy_cost_savings": rec["energy_cost_savings"],
"total_work_hours": rec["total_work_hours"],
"labour_days": rec["labour_days"],
"already_installed": rec["already_installed"],
}
)
parts_by_index.append(rec["parts"])
plan_ids_by_index.append(rec["plan_id"])
@ -310,9 +379,7 @@ def bulk_upload_recommendations_and_materials(
# 2. Insert recommendations and get IDs
# ---------------------------------------------------------
result = session.execute(
insert(Recommendation)
.values(recommendation_rows)
.returning(Recommendation.id)
insert(Recommendation).values(recommendation_rows).returning(Recommendation.id)
)
recommendation_ids = [row[0] for row in result]
@ -324,19 +391,19 @@ def bulk_upload_recommendations_and_materials(
for recommendation_id, parts in zip(recommendation_ids, parts_by_index):
for part in parts:
materials_rows.append({
"recommendation_id": recommendation_id,
"material_id": part["material_id"],
"depth": part["depth"],
"quantity": part["quantity"],
"quantity_unit": part["quantity_unit"],
"estimated_cost": part["estimated_cost"],
})
materials_rows.append(
{
"recommendation_id": recommendation_id,
"material_id": part["material_id"],
"depth": part["depth"],
"quantity": part["quantity"],
"quantity_unit": part["quantity_unit"],
"estimated_cost": part["estimated_cost"],
}
)
if materials_rows:
session.execute(
insert(RecommendationMaterials).values(materials_rows)
)
session.execute(insert(RecommendationMaterials).values(materials_rows))
# ---------------------------------------------------------
# 4. Insert plan ↔ recommendation links
@ -346,26 +413,22 @@ def bulk_upload_recommendations_and_materials(
"plan_id": plan_id,
"recommendation_id": recommendation_id,
}
for plan_id, recommendation_id in zip(
plan_ids_by_index, recommendation_ids
)
for plan_id, recommendation_id in zip(plan_ids_by_index, recommendation_ids)
]
session.execute(
insert(PlanRecommendations).values(plan_recommendation_rows)
)
session.execute(insert(PlanRecommendations).values(plan_recommendation_rows))
def chunked(iterable, size=100):
for i in range(0, len(iterable), size):
yield iterable[i:i + size]
yield iterable[i : i + size]
def get_property_ids(portfolio_id: int) -> list[int]:
with db_read_session() as session:
return [
pid for (pid,) in
session.query(PropertyModel.id)
pid
for (pid,) in session.query(PropertyModel.id)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
]
@ -381,12 +444,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# recommendation_materials (via recommendation)
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation_materials rm
USING recommendation r
WHERE rm.recommendation_id = r.id
AND r.property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -394,12 +459,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# plan_recommendations (via plan)
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan_recommendations pr
USING plan p
WHERE pr.plan_id = p.id
AND p.property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -407,13 +474,15 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# funding_package_measures
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM funding_package_measures fpm
USING funding_package fp, plan p
WHERE fpm.funding_package_id = fp.id
AND fp.plan_id = p.id
AND p.property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -421,10 +490,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# inspections (direct)
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM inspections
WHERE property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -432,12 +503,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# funding_package
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM funding_package fp
USING plan p
WHERE fp.plan_id = p.id
AND p.property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -445,10 +518,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# recommendation (direct — CRITICAL FIX)
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation
WHERE property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -456,10 +531,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# plan (direct)
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan
WHERE property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -467,18 +544,22 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# property-scoped tables
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM property_details_epc
WHERE property_id = ANY(:property_ids)
"""),
"""
),
params,
)
session.execute(
text("""
text(
"""
DELETE FROM property_targets
WHERE property_id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -486,10 +567,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
# properties LAST
# --------------------------------------------------
session.execute(
text("""
text(
"""
DELETE FROM property
WHERE id = ANY(:property_ids)
"""),
"""
),
params,
)
@ -510,8 +593,7 @@ def delete_portfolio_scenarios_if_empty(portfolio_id: int):
with db_session() as session:
session.execute(
delete(Scenario)
.where(Scenario.portfolio_id == portfolio_id)
delete(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id)
)
print("Deleted scenarios for empty portfolio")
@ -530,6 +612,7 @@ def clear_portfolio_in_batches(
total = (len(property_ids) + property_batch_size - 1) // property_batch_size
import time
for i, batch in enumerate(chunked(property_ids, property_batch_size), start=1):
print(f"Deleting batch {i}/{total} ({len(batch)} properties)")
start_time = time.time()
@ -542,3 +625,163 @@ def clear_portfolio_in_batches(
delete_portfolio_scenarios_if_empty(portfolio_id)
print("Portfolio cleared in batches.")
def get_plans_by_scenario_ids(ids: List[int]) -> List[PlanModel]:
stmt = select(PlanModel).where(PlanModel.scenario_id.in_(ids))
with db_read_session() as session:
session_any: Any = session # Typehint as Any to satisfy Pylance...
return session_any.exec(stmt).scalars().all()
def get_most_recent_plans_by_portfolio_id(
portfolio_id: int,
min_property_id: Optional[int] = None,
max_property_id: Optional[int] = None,
) -> List[PlanModel]:
filters = [PlanModel.portfolio_id == portfolio_id]
if min_property_id is not None:
filters.append(PlanModel.property_id >= min_property_id)
if max_property_id is not None:
filters.append(PlanModel.property_id <= max_property_id)
# NOTE: This statement works for Postgres only, because of the Distinct
stmt = (
select(PlanModel)
.where(and_(*filters))
.distinct(
PlanModel.property_id, PlanModel.scenario_id
) # one plan per property per scenario
.order_by(
PlanModel.property_id,
PlanModel.scenario_id,
PlanModel.created_at.desc(),
PlanModel.id.desc(),
)
)
with db_read_session() as session:
session_any: Any = session # Typehint as Any to satisfy Pylance...
return session_any.exec(stmt).scalars().all()
def get_most_recent_plans_by_scenario_ids(
scenario_ids: List[int],
min_property_id: Optional[int] = None,
max_property_id: Optional[int] = None,
) -> List[PlanModel]:
if not scenario_ids:
return []
# Base filter: scenario_id in provided list
filters: List[ColumnElement[bool]] = [PlanModel.scenario_id.in_(scenario_ids)]
# Add optional property ID range filters
if min_property_id is not None:
filters.append(PlanModel.property_id >= min_property_id)
if max_property_id is not None:
filters.append(PlanModel.property_id <= max_property_id)
# NOTE: This statement works for Postgres only, because of the Distinct
stmt = (
select(PlanModel)
.where(and_(*filters))
.distinct(
PlanModel.property_id, PlanModel.scenario_id
) # one plan per property per scenario
.order_by(
PlanModel.property_id,
PlanModel.scenario_id,
PlanModel.created_at.desc(),
PlanModel.id.desc(),
)
)
with db_read_session() as session:
session_any: Any = session # Typehint as Any to satisfy Pylance
return session_any.exec(stmt).scalars().all()
def get_scenarios_by_portfolio_id(portfolio_id: int) -> List[ScenarioModel]:
stmt = select(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id)
with db_read_session() as session:
session_any: Any = session # Typehint as Any to satisfy Pylance...
return session_any.exec(stmt).scalars().all()
def get_scenarios_count_by_portfolio_id(portfolio_id: int) -> int:
stmt = (
select(func.count())
.select_from(ScenarioModel)
.where(ScenarioModel.portfolio_id == portfolio_id)
)
with db_read_session() as session:
session_any: Any = session # Typehint as Any to satisfy Pylance...
return session_any.exec(stmt).scalar_one()
def get_default_plans(
portfolio_id: int,
min_property_id: Optional[int] = None,
max_property_id: Optional[int] = None,
) -> List[PlanModel]:
filters: List[ColumnElement[bool]] = [
PlanModel.portfolio_id == portfolio_id,
PlanModel.is_default.is_(True),
]
if min_property_id is not None:
filters.append(PlanModel.property_id >= min_property_id)
if max_property_id is not None:
filters.append(PlanModel.property_id <= max_property_id)
stmt = select(PlanModel).where(and_(*filters))
with db_read_session() as session:
session_any: Any = session # Typehint as Any to satisfy Pylance...
plans: List[PlanModel] = session_any.exec(stmt).scalars().all()
return plans
def bulk_update_plans(
plan_models: List[PlanModel],
scenario_models: List[ScenarioModel],
) -> int:
if not plan_models:
return 0
with db_read_session() as session:
plan_mapper: Mapper[Any] = inspect(PlanModel)
scenario_mapper: Mapper[Any] = inspect(ScenarioModel)
plan_mappings: List[Dict[str, Any]] = (
[]
) # Typehint as Any to satisfy Pylance...
for plan in plan_models:
data: Dict[str, Any] = {
c.name: getattr(plan, c.name)
for c in plan.__table__.columns
if c.name != "id"
}
data["id"] = plan.id
plan_mappings.append(data)
session.bulk_update_mappings(plan_mapper, plan_mappings)
scenario_mappings: List[Dict[str, Any]] = (
[]
) # Typehint as Any to satisfy Pylance...
for scenario in scenario_models:
data: Dict[str, Any] = {
c.name: getattr(scenario, c.name)
for c in scenario.__table__.columns
if c.name not in {"id", "portfolio_id"}
}
data["id"] = scenario.id
scenario_mappings.append(data)
session.bulk_update_mappings(scenario_mapper, scenario_mappings)
session.commit()
return len(plan_models)

View file

@ -11,7 +11,7 @@ from sqlmodel import Session, select
from backend.app.db.connection import get_db_session
# ---- Models ----
from backend.app.db.models.tasks import Task, SubTask
from backend.app.db.models.tasks import SourceEnum, Task, SubTask
# ============================================================
@ -25,7 +25,12 @@ class SubTaskInterface:
# --------------------------------------------------------
# CREATE SUBTASK
# --------------------------------------------------------
def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None, status=None):
def create_subtask(
self,
task_id: UUID,
inputs: Optional[Dict[str, Any]] = None,
status: Optional[str] = None,
):
now = datetime.now(timezone.utc)
with get_db_session() as session:
@ -56,8 +61,12 @@ class SubTaskInterface:
# UPDATE STATUS (in progress, complete, failed)
# --------------------------------------------------------
def update_subtask_status(
self, subtask_id: UUID, status: str, outputs=None, cloud_logs_url=None
):
self,
subtask_id: UUID,
status: str,
outputs: Optional[Dict[str, str]] = None,
cloud_logs_url: Optional[str] = None,
) -> SubTask:
"""
Update the status of a subtask, and recalculate the parent task progress.
:param subtask_id: UUID of the subtask to update
@ -177,9 +186,7 @@ class SubTaskInterface:
if not task:
return
subtasks = session.exec(
select(SubTask).where(SubTask.task_id == task_id)
).all()
subtasks = session.exec(select(SubTask).where(SubTask.task_id == task_id)).all()
statuses = [s.status.lower() for s in subtasks]
now = datetime.now(timezone.utc)
@ -211,7 +218,7 @@ class SubTaskInterface:
subtask_id: UUID,
status: str,
outputs: Optional[Dict[str, Any]],
cloud_logs_url: Optional[str]
cloud_logs_url: Optional[str],
):
now = datetime.now(timezone.utc)
@ -261,6 +268,8 @@ class TasksInterface:
service: Optional[str] = None,
inputs: Optional[Dict[str, Any]] = None,
task_only: bool = False,
source: Optional[SourceEnum] = None,
source_id: Optional[str] = None,
):
"""
Create a new Task record, and an initial SubTask in waiting state. Can also be used to create just
@ -279,6 +288,8 @@ class TasksInterface:
status="waiting",
job_started=now,
job_completed=None,
source=source,
source_id=source_id,
)
session.add(task)

View file

@ -7,9 +7,7 @@ from sqlalchemy import (
func,
UniqueConstraint,
)
from sqlalchemy.orm import declarative_base
Base = declarative_base()
from backend.app.db.base import Base
class PostcodeSearch(Base):

View file

@ -7,12 +7,12 @@ from sqlalchemy import (
String,
Enum as SqlEnum,
)
from sqlalchemy.orm import declarative_base, relationship
from sqlalchemy.orm import relationship
from backend.condition.domain.aspect_type import AspectType
from backend.condition.domain.element_type import ElementType
Base = declarative_base()
from backend.app.db.base import Base
ElementTypeDb = SqlEnum(
ElementType,

View file

@ -1,10 +1,8 @@
from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.dialects.postgresql import ENUM as PgEnum
import enum
from datetime import datetime
Base = declarative_base()
from backend.app.db.base import Base
from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey
from sqlalchemy.dialects.postgresql import ENUM as PgEnum
class EnergyAssessment(Base):
@ -190,7 +188,7 @@ class EnergyAssessmentDocuments(Base):
id = Column(BigInteger, primary_key=True, autoincrement=True)
uprn = Column(BigInteger, nullable=False)
energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False)
document_type = Column(PgEnum(DocumentTypeEnum, name="document_type", create_type=False), nullable=False)
document_type = Column(PgEnum(DocumentTypeEnum, name="document_type"), nullable=False)
document_location = Column(Text, nullable=False)
uploaded_at = Column(DateTime(timezone=True), nullable=False, default=datetime.utcnow)
scenario_id = Column(BigInteger, ForeignKey('energy_assessment_scenarios.id'), nullable=True)

View file

@ -4,11 +4,8 @@ from sqlalchemy import (
String,
JSON,
TIMESTAMP,
UniqueConstraint,
)
from sqlalchemy.orm import declarative_base
Base = declarative_base()
from backend.app.db.base import Base
class EpcStore(Base):

View file

@ -1,13 +1,19 @@
import enum
from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, BigInteger, ForeignKey
from sqlalchemy.orm import declarative_base
from sqlalchemy import (
Column,
Integer,
Float,
Enum,
TIMESTAMP,
BigInteger,
ForeignKey,
)
from sqlalchemy.sql import func
from backend.app.db.models.recommendations import Plan
from backend.app.db.base import Base
from backend.app.db.models.recommendations import PlanModel
from backend.app.db.models.materials import MaterialType, Material
Base = declarative_base()
class SchemeEnum(enum.Enum):
eco4 = "eco4"
@ -17,13 +23,17 @@ class SchemeEnum(enum.Enum):
class FundingPackage(Base):
__tablename__ = 'funding_package'
__tablename__ = "funding_package"
id = Column(Integer, primary_key=True, autoincrement=True)
plan_id = Column(BigInteger, ForeignKey(Plan.id), nullable=False)
plan_id = Column(BigInteger, ForeignKey(PlanModel.id), nullable=False)
scheme = Column(
Enum(SchemeEnum, values_callable=lambda x: [e.value for e in x], create_constraint=False),
nullable=False
Enum(
SchemeEnum,
values_callable=lambda x: [e.value for e in x],
create_constraint=False,
),
nullable=False,
)
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
project_funding = Column(Float)
@ -34,15 +44,23 @@ class FundingPackage(Base):
class FundingPackageMeasures(Base):
__tablename__ = 'funding_package_measures'
__tablename__ = "funding_package_measures"
id = Column(Integer, primary_key=True, autoincrement=True)
funding_package_id = Column(BigInteger, ForeignKey(FundingPackage.id), nullable=False)
measure = Column(
Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False),
nullable=False
funding_package_id = Column(
BigInteger, ForeignKey(FundingPackage.id), nullable=False
)
material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False) # Assuming material table exists
measure = Column(
Enum(
MaterialType,
values_callable=lambda x: [e.value for e in x],
create_constraint=False,
),
nullable=False,
)
material_id = Column(
BigInteger, ForeignKey(Material.id), nullable=False
) # Assuming material table exists
innovation_uplift = Column(Float)
partial_project_score = Column(Float)
uplift_project_score = Column(Float)

View file

@ -9,11 +9,9 @@ from sqlalchemy import (
Enum,
ForeignKey,
)
from sqlalchemy.ext.declarative import declarative_base
from backend.app.db.base import Base
from backend.app.db.models.portfolio import PropertyModel
Base = declarative_base()
# -------------------------------------------------------------------
# ENUM DEFINITIONS (equivalent to drizzle pgEnum calls)

View file

@ -1,10 +1,9 @@
import enum
from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, Boolean
from sqlalchemy.orm import declarative_base
from sqlalchemy.sql import func
Base = declarative_base()
from backend.app.db.base import Base
class MaterialType(enum.Enum):

View file

@ -1,7 +1,5 @@
from sqlalchemy import Column, BigInteger, String, TIMESTAMP, ForeignKey, Integer
from sqlalchemy.orm import declarative_base
Base = declarative_base()
from backend.app.db.base import Base
class NonIntrusiveSurvey(Base):

View file

@ -1,13 +1,22 @@
import enum
import pytz
import datetime
from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import (
Column,
Integer,
BigInteger,
Text,
Boolean,
Float,
DateTime,
Enum,
ForeignKey,
CheckConstraint,
)
from backend.app.db.base import Base
from backend.app.db.models.users import UserModel # noqa
from backend.app.db.models.materials import MaterialType
Base = declarative_base()
class PortfolioStatus(enum.Enum):
SCOPING = "scoping"
@ -22,7 +31,7 @@ class PortfolioStatus(enum.Enum):
NEEDS_REVIEW = "needs review"
class PortfolioGoal(enum.Enum):
class PortfolioGoal(enum.Enum): # TODO: Move to domain?
VALUATION_IMPROVEMENT = "Valuation Improvement"
INCREASING_EPC = "Increasing EPC"
REDUCING_CO2_EMISSIONS = "Reducing CO2 emissions"
@ -31,23 +40,43 @@ class PortfolioGoal(enum.Enum):
class Portfolio(Base):
__tablename__ = 'portfolio'
__tablename__ = "portfolio"
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(Text, nullable=False)
budget = Column(Float)
status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False)
goal = Column(Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]), nullable=False)
status = Column(
Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]),
nullable=False,
)
goal = Column(
Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]),
nullable=False,
)
cost = Column(Float)
number_of_properties = Column(Integer)
co2_equivalent_savings = Column(Float) # Unit is always tonnes so we don't need to store the unit
energy_savings = Column(Float) # Unit is always kWh so we don't need to store the unit
energy_cost_savings = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
property_valuation_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
rental_yield_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
co2_equivalent_savings = Column(
Float
) # Unit is always tonnes so we don't need to store the unit
energy_savings = Column(
Float
) # Unit is always kWh so we don't need to store the unit
energy_cost_savings = Column(
Float
) # Unit is always £ so we don't need to store the unit for the moment
property_valuation_increase = Column(
Float
) # Unit is always £ so we don't need to store the unit for the moment
rental_yield_increase = Column(
Float
) # Unit is always £ so we don't need to store the unit for the moment
total_work_hours = Column(Float)
labour_days = Column(Float)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
created_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
updated_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
# Aggregations for summary
epc_breakdown_pre_retrofit = Column(Text)
epc_breakdown_post_retrofit = Column(Text)
@ -71,7 +100,7 @@ class PropertyCreationStatus(enum.Enum):
ERROR = "ERROR"
class Epc(enum.Enum):
class Epc(enum.Enum): # TODO: Move to domain?
A = "A"
B = "B"
C = "C"
@ -82,20 +111,27 @@ class Epc(enum.Enum):
class PropertyModel(Base):
__tablename__ = 'property'
__tablename__ = "property"
id = Column(Integer, primary_key=True, autoincrement=True)
portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
creation_status = Column(Enum(PropertyCreationStatus), nullable=False)
uprn = Column(Integer)
uprn = Column(BigInteger)
landlord_property_id = Column(Text)
building_reference_number = Column(Integer)
status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False)
building_reference_number = Column(BigInteger)
status = Column(
Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]),
nullable=False,
)
address = Column(Text)
postcode = Column(Text)
has_pre_condition_report = Column(Boolean)
has_recommendations = Column(Boolean)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
created_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
updated_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
property_type = Column(Text)
built_form = Column(Text)
local_authority = Column(Text)
@ -127,7 +163,7 @@ rating_lookup = {
"Average": FeatureRating.AVERAGE,
"Poor": FeatureRating.POOR,
"Very Poor": FeatureRating.VERY_POOR,
"N/A": FeatureRating.NA
"N/A": FeatureRating.NA,
}
@ -136,32 +172,45 @@ def get_feature_rating_from_string(rating_str: str):
class PropertyDetailsEpcModel(Base):
__tablename__ = 'property_details_epc'
__tablename__ = "property_details_epc"
id = Column(Integer, primary_key=True, autoincrement=True)
property_id = Column(Integer, ForeignKey('property.id'), nullable=False)
portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
property_id = Column(Integer, ForeignKey("property.id"), nullable=False)
portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
full_address = Column(Text)
lodgement_date = Column(DateTime)
is_expired = Column(Boolean)
total_floor_area = Column(Float)
walls = Column(Text)
walls_rating = Column(Integer, CheckConstraint('walls_rating>=1 AND walls_rating<=5'))
walls_rating = Column(
Integer, CheckConstraint("walls_rating>=1 AND walls_rating<=5")
)
roof = Column(Text)
roof_rating = Column(Integer, CheckConstraint('roof_rating>=1 AND roof_rating<=5'))
roof_rating = Column(Integer, CheckConstraint("roof_rating>=1 AND roof_rating<=5"))
floor = Column(Text)
floor_rating = Column(Integer, CheckConstraint('floor_rating>=1 AND floor_rating<=5'))
floor_rating = Column(
Integer, CheckConstraint("floor_rating>=1 AND floor_rating<=5")
)
windows = Column(Text)
windows_rating = Column(Integer, CheckConstraint('windows_rating>=1 AND windows_rating<=5'))
windows_rating = Column(
Integer, CheckConstraint("windows_rating>=1 AND windows_rating<=5")
)
heating = Column(Text)
heating_rating = Column(Integer, CheckConstraint('heating_rating>=1 AND heating_rating<=5'))
heating_rating = Column(
Integer, CheckConstraint("heating_rating>=1 AND heating_rating<=5")
)
heating_controls = Column(Text)
heating_controls_rating = Column(
Integer, CheckConstraint('heating_controls_rating>=1 AND heating_controls_rating<=5')
Integer,
CheckConstraint("heating_controls_rating>=1 AND heating_controls_rating<=5"),
)
hot_water = Column(Text)
hot_water_rating = Column(Integer, CheckConstraint('hot_water_rating>=1 AND hot_water_rating<=5'))
hot_water_rating = Column(
Integer, CheckConstraint("hot_water_rating>=1 AND hot_water_rating<=5")
)
lighting = Column(Text)
lighting_rating = Column(Integer, CheckConstraint('lighting_rating>=1 AND lighting_rating<=5'))
lighting_rating = Column(
Integer, CheckConstraint("lighting_rating>=1 AND lighting_rating<=5")
)
mainfuel = Column(Text)
ventilation = Column(Text)
solar_pv = Column(Text)
@ -219,7 +268,7 @@ class PropertyDetailsSpatial(Base):
class PropertyDetailsMeter(Base):
__tablename__ = 'property_details_meter'
__tablename__ = "property_details_meter"
id = Column(Integer, primary_key=True, autoincrement=True)
uprn = Column(Integer, nullable=False)
energy_supplier = Column(Text)
@ -230,11 +279,13 @@ class PropertyDetailsMeter(Base):
class PropertyTargetsModel(Base):
__tablename__ = 'property_targets'
__tablename__ = "property_targets"
id = Column(Integer, primary_key=True, autoincrement=True)
property_id = Column(Integer, ForeignKey('property.id'), nullable=False)
portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
property_id = Column(Integer, ForeignKey("property.id"), nullable=False)
portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
created_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
epc = Column(Enum(Epc))
heat_demand = Column(Text)
@ -242,23 +293,36 @@ class PropertyTargetsModel(Base):
class PortfolioUsers(Base):
__tablename__ = "portfolioUsers"
id = Column(Integer, primary_key=True, autoincrement=True)
user_id = Column(Integer, ForeignKey('user.id'), nullable=False)
portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
user_id = Column(Integer, ForeignKey("user.id"), nullable=False)
portfolioId = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
role = Column(Text, nullable=False)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
created_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
updated_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
class PropertyInstalledMeasures(Base):
"""
This model keeps a record of the installed measures for each property, at the UPRN level
"""
__tablename__ = 'property_installed_measures'
__tablename__ = "property_installed_measures"
id = Column(Integer, primary_key=True, autoincrement=True)
uprn = Column(Integer, nullable=False)
measure_type = Column(
Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False),
nullable=False
Enum(
MaterialType,
values_callable=lambda x: [e.value for e in x],
create_constraint=False,
),
nullable=False,
)
created_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
installed_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
installed_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))

View file

@ -1,17 +1,32 @@
from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum
from sqlalchemy.orm import declarative_base
import enum
from typing import Iterable, List, NamedTuple, Optional, Type
from sqlalchemy import (
Column,
BigInteger,
String,
Float,
Boolean,
TIMESTAMP,
ForeignKey,
Enum,
)
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy.sql import func
from backend.app.db.models.portfolio import Portfolio, PropertyModel
from datetime import datetime
from backend.app.db.base import Base
from backend.app.db.models.portfolio import Portfolio, PortfolioGoal, PropertyModel
from backend.app.db.models.materials import Material
from backend.app.db.models.portfolio import Epc
from datatypes.enums import QuantityUnits
import enum
Base = declarative_base()
def portfolio_goal_values(enum_cls: Type[PortfolioGoal]) -> List[str]:
return [e.value for e in enum_cls]
class Recommendation(Base):
__tablename__ = 'recommendation'
__tablename__ = "recommendation"
id = Column(BigInteger, primary_key=True, autoincrement=True)
property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
@ -37,19 +52,52 @@ class Recommendation(Base):
class RecommendationMaterials(Base):
__tablename__ = 'recommendation_materials'
__tablename__ = "recommendation_materials"
id = Column(BigInteger, primary_key=True, autoincrement=True)
recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False)
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
depth = Column(Float, nullable=False)
quantity = Column(Float, nullable=False)
quantity_unit = Column(Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), nullable=False)
estimated_cost = Column(Float, nullable=False)
id: Mapped[int] = mapped_column(
BigInteger, primary_key=True, autoincrement=True
)
recommendation_id: Mapped[int] = mapped_column(
BigInteger,
ForeignKey("recommendation.id"),
nullable=False,
)
material_id: Mapped[int] = mapped_column(
BigInteger,
ForeignKey(Material.id),
nullable=False,
)
created_at: Mapped[datetime] = mapped_column(
TIMESTAMP,
nullable=False,
server_default=func.now(),
)
depth: Mapped[float] = mapped_column(
Float,
nullable=False,
)
quantity: Mapped[float] = mapped_column(
Float,
nullable=False,
)
quantity_unit: Mapped[QuantityUnits] = mapped_column(
Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]),
nullable=False,
)
estimated_cost: Mapped[float] = mapped_column(
Float,
nullable=False,
)
class PlanTypeEnum(enum.Enum):
class PlanTypeEnum(enum.Enum): # TODO: move this to domain?
SOLAR_ECO4 = "solar_eco4"
SOLAR_HHRSH_ECO4 = "solar_hhrsh_eco4"
EMPTY_CAVITY_ECO = "empty_cavity_eco"
@ -57,20 +105,36 @@ class PlanTypeEnum(enum.Enum):
EXTRACTION_ECO = "extraction_eco"
class Plan(Base):
__tablename__ = 'plan'
class PlanModel(Base):
__tablename__ = "plan"
id = Column(BigInteger, primary_key=True, autoincrement=True)
name = Column(String, nullable=True, default="")
portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
scenario_id = Column(BigInteger, ForeignKey('scenario.id')) # Doesn't have to be linked to a scenario
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
is_default = Column(Boolean, nullable=False)
valuation_increase_lower_bound = Column(Float)
valuation_increase_upper_bound = Column(Float)
valuation_increase_average = Column(Float)
plan_type = Column(
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
name: Mapped[Optional[str]] = mapped_column(String, nullable=True, default="")
portfolio_id: Mapped[int] = mapped_column(
BigInteger, ForeignKey(Portfolio.id), nullable=False
)
property_id: Mapped[int] = mapped_column(
BigInteger, ForeignKey(PropertyModel.id), nullable=False
)
scenario_id: Mapped[Optional[int]] = mapped_column(
BigInteger, ForeignKey("scenario.id")
)
created_at: Mapped[datetime] = mapped_column( # type: ignore
TIMESTAMP, nullable=False, server_default=func.now()
)
is_default: Mapped[bool] = mapped_column(Boolean, nullable=False)
valuation_increase_lower_bound: Mapped[Optional[float]] = mapped_column(Float)
valuation_increase_upper_bound: Mapped[Optional[float]] = mapped_column(Float)
valuation_increase_average: Mapped[Optional[float]] = mapped_column(Float)
plan_type: Mapped[Optional[PlanTypeEnum]] = mapped_column(
Enum(
PlanTypeEnum,
name="plan_type",
@ -79,73 +143,90 @@ class Plan(Base):
),
nullable=True,
)
post_sap_points = Column(Float)
post_epc_rating = Column(Enum(Epc))
post_co2_emissions = Column(Float)
co2_savings = Column(Float)
post_energy_bill = Column(Float)
energy_bill_savings = Column(Float)
post_energy_consumption = Column(Float) # energy demand in kWh/year
energy_consumption_savings = Column(Float)
valuation_post_retrofit = Column(Float)
valuation_increase = Column(Float)
post_sap_points: Mapped[Optional[float]] = mapped_column(Float)
post_epc_rating: Mapped[Optional[Epc]] = mapped_column(Enum(Epc))
post_co2_emissions: Mapped[Optional[float]] = mapped_column(Float)
co2_savings: Mapped[Optional[float]] = mapped_column(Float)
post_energy_bill: Mapped[Optional[float]] = mapped_column(Float)
energy_bill_savings: Mapped[Optional[float]] = mapped_column(Float)
post_energy_consumption: Mapped[Optional[float]] = mapped_column(Float)
energy_consumption_savings: Mapped[Optional[float]] = mapped_column(Float)
valuation_post_retrofit: Mapped[Optional[float]] = mapped_column(Float)
valuation_increase: Mapped[Optional[float]] = mapped_column(Float)
# Financial metrics, excluding funding
cost_of_works = Column(Float)
contingency_cost = Column(Float)
cost_of_works: Mapped[Optional[float]] = mapped_column(Float)
contingency_cost: Mapped[Optional[float]] = mapped_column(Float)
class PlanRecommendations(Base):
__tablename__ = 'plan_recommendations'
__tablename__ = "plan_recommendations"
id = Column(BigInteger, primary_key=True, autoincrement=True)
plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False)
recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
plan_id = Column(BigInteger, ForeignKey("plan.id"), nullable=False)
recommendation_id = Column(
BigInteger, ForeignKey("recommendation.id"), nullable=False
)
class Scenario(Base):
__tablename__ = 'scenario'
class ScenarioModel(Base):
__tablename__ = "scenario"
id = Column(BigInteger, primary_key=True, autoincrement=True)
name = Column(String, nullable=False)
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
budget = Column(Float)
portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
housing_type = Column(String, nullable=False)
goal = Column(String, nullable=False)
goal_value = Column(String, nullable=False)
trigger_file_path = Column(String, nullable=False)
already_installed_file_path = Column(String)
patches_file_path = Column(String)
non_invasive_recommendations_file_path = Column(String)
exclusions = Column(String)
multi_plan = Column(Boolean, default=False)
is_default = Column(Boolean, default=False, nullable=False)
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
name: Mapped[str] = mapped_column(String, nullable=False)
created_at: Mapped[datetime] = mapped_column(
TIMESTAMP, nullable=False, server_default=func.now()
)
budget: Mapped[Optional[float]] = mapped_column(Float)
portfolio_id: Mapped[int] = mapped_column(
BigInteger, ForeignKey(Portfolio.id), nullable=False
)
housing_type: Mapped[str] = mapped_column(String, nullable=False)
goal: Mapped[PortfolioGoal] = mapped_column(
Enum(PortfolioGoal, values_callable=portfolio_goal_values, name="goal"),
nullable=False,
)
goal_value: Mapped[str] = mapped_column(String, nullable=False)
trigger_file_path: Mapped[str] = mapped_column(String, nullable=False)
already_installed_file_path: Mapped[Optional[str]] = mapped_column(String)
patches_file_path: Mapped[Optional[str]] = mapped_column(String)
non_invasive_recommendations_file_path: Mapped[Optional[str]] = mapped_column(
String
)
exclusions: Mapped[Optional[str]] = mapped_column(String)
multi_plan: Mapped[bool] = mapped_column(Boolean, default=False)
is_default: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
# Add in the fields we need, which were previously sitting at the portfolio level
cost = Column(Float)
contingency = Column(Float)
funding = Column(Float)
total_work_hours = Column(Float)
energy_savings = Column(Float)
co2_equivalent_savings = Column(Float)
energy_cost_savings = Column(Float)
epc_breakdown_pre_retrofit = Column(String)
epc_breakdown_post_retrofit = Column(String)
number_of_properties = Column(BigInteger)
n_units_to_retrofit = Column(BigInteger)
co2_per_unit_pre_retrofit = Column(String)
co2_per_unit_post_retrofit = Column(String)
energy_bill_per_unit_pre_retrofit = Column(String)
energy_bill_per_unit_post_retrofit = Column(String)
energy_consumption_per_unit_pre_retrofit = Column(String)
energy_consumption_per_unit_post_retrofit = Column(String)
valuation_improvement_per_unit = Column(String)
cost_per_unit = Column(String)
cost_per_co2_saved = Column(String)
cost_per_sap_point = Column(String)
valuation_return_on_investment = Column(String)
property_valuation_increase = Column(Float)
labour_days = Column(Float)
cost: Mapped[Optional[float]] = mapped_column(Float)
contingency: Mapped[Optional[float]] = mapped_column(Float)
funding: Mapped[Optional[float]] = mapped_column(Float)
total_work_hours: Mapped[Optional[float]] = mapped_column(Float)
energy_savings: Mapped[Optional[float]] = mapped_column(Float)
co2_equivalent_savings: Mapped[Optional[float]] = mapped_column(Float)
energy_cost_savings: Mapped[Optional[float]] = mapped_column(Float)
epc_breakdown_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
epc_breakdown_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
number_of_properties: Mapped[Optional[int]] = mapped_column(BigInteger)
n_units_to_retrofit: Mapped[Optional[int]] = mapped_column(BigInteger)
co2_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
co2_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
energy_bill_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
energy_bill_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
energy_consumption_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(
String
)
energy_consumption_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(
String
)
valuation_improvement_per_unit: Mapped[Optional[str]] = mapped_column(String)
cost_per_unit: Mapped[Optional[str]] = mapped_column(String)
cost_per_co2_saved: Mapped[Optional[str]] = mapped_column(String)
cost_per_sap_point: Mapped[Optional[str]] = mapped_column(String)
valuation_return_on_investment: Mapped[Optional[str]] = mapped_column(String)
property_valuation_increase: Mapped[Optional[float]] = mapped_column(Float)
labour_days: Mapped[Optional[float]] = mapped_column(Float)
class MeasureType(enum.Enum):
@ -201,3 +282,12 @@ class InstalledMeasure(Base):
heat_demand_savings = Column(Float)
source = Column(String)
is_active = Column(Boolean, nullable=False, default=True)
def enum_values(e: Iterable[PlanTypeEnum]) -> list[str]:
return [m.value for m in e]
class PlanPersistence(NamedTuple):
plan: PlanModel
scenario: ScenarioModel

View file

@ -2,9 +2,7 @@ import datetime
import pytz
from enum import Enum as PyEnum
from sqlalchemy import Column, Integer, Float, DateTime, JSON, BigInteger, ForeignKey, Enum, Boolean
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
from backend.app.db.base import Base
class Solar(Base):

View file

@ -1,14 +1,24 @@
import enum
from typing import Optional
from datetime import datetime
from uuid import UUID, uuid4
from sqlalchemy import Column, Enum
from sqlmodel import SQLModel, Field, Relationship
class SourceEnum(enum.Enum): # TODO: move to domain?
PORTFOLIO = "portfolio_id"
class Task(SQLModel, table=True):
__tablename__ = "tasks"
id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, )
id: UUID = Field(
default_factory=uuid4,
primary_key=True,
index=True,
)
task_source: str
job_started: Optional[datetime] = None
job_completed: Optional[datetime] = None
@ -16,13 +26,32 @@ class Task(SQLModel, table=True):
service: Optional[str] = None
updated_at: datetime = Field(default_factory=datetime.utcnow)
# source: Mapped[Optional[SourceEnum]] = mapped_column(Enum(SourceEnum)) <- SQLAlchemy not SQLModel
source: Optional[SourceEnum] = Field(
default=None,
sa_column=Column(
Enum(
SourceEnum,
name="source",
values_callable=lambda e: [m.value for m in e],
),
nullable=True,
),
)
source_id: Optional[str] = None
sub_tasks: list["SubTask"] = Relationship(back_populates="task")
class SubTask(SQLModel, table=True):
__tablename__ = "sub_task"
id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, )
id: UUID = Field(
default_factory=uuid4,
primary_key=True,
index=True,
)
task_id: UUID = Field(foreign_key="tasks.id")
job_started: Optional[datetime] = None

View file

@ -1,8 +1,6 @@
from sqlalchemy import Column, Integer, String, DateTime
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql import func
Base = declarative_base()
from backend.app.db.base import Base
class UserModel(Base):

View file

@ -1,4 +1,3 @@
import uuid
from typing import Optional
from sqlmodel import SQLModel, Field
@ -12,4 +11,4 @@ class Whlg(SQLModel, table=True):
index=True,
)
postcode: str = Field(nullable=False)
postcode: str = Field(nullable=False)

View file

@ -0,0 +1,160 @@
from __future__ import annotations
from dataclasses import replace
from typing import Optional
from backend.app.db.models.portfolio import PortfolioGoal
from backend.app.db.models.recommendations import (
PlanModel,
PlanPersistence,
ScenarioModel,
)
from backend.app.domain.classes.scenario import Scenario
from backend.app.domain.records.plan_record import PlanRecord
from backend.app.utils import sap_to_epc
class Plan:
def __init__(
self, record: PlanRecord, scenario: Scenario, id: Optional[int] = None
):
self.id: Optional[int] = id
self.record: PlanRecord = record
self.scenario: Scenario = scenario
@classmethod
def from_sqlalchemy(cls, plan_model: PlanModel, scenario: Scenario) -> Plan:
if not scenario:
raise ValueError(f"No Scenario associated with Plan of ID {plan_model.id}")
record = PlanRecord(
property_id=plan_model.property_id,
portfolio_id=plan_model.portfolio_id,
created_at=plan_model.created_at,
is_default=plan_model.is_default,
valuation_increase_lower_bound=plan_model.valuation_increase_lower_bound,
valuation_increase_upper_bound=plan_model.valuation_increase_upper_bound,
valuation_increase_average=plan_model.valuation_increase_average,
plan_type=plan_model.plan_type,
post_sap_points=plan_model.post_sap_points,
post_epc_rating=plan_model.post_epc_rating,
post_co2_emissions=plan_model.post_co2_emissions,
co2_savings=plan_model.co2_savings,
post_energy_bill=plan_model.post_energy_bill,
energy_bill_savings=plan_model.energy_bill_savings,
post_energy_consumption=plan_model.post_energy_consumption,
energy_consumption_savings=plan_model.energy_consumption_savings,
valuation_post_retrofit=plan_model.valuation_post_retrofit,
valuation_increase=plan_model.valuation_increase,
cost_of_works=plan_model.cost_of_works,
contingency_cost=plan_model.contingency_cost,
name=plan_model.name,
)
return cls(record=record, scenario=scenario, id=plan_model.id)
@property
def is_compliant(self) -> bool:
goal: PortfolioGoal = self.scenario.record.goal
match goal:
case PortfolioGoal.INCREASING_EPC:
return self._is_compliant_epc()
case _:
raise NotImplementedError
@property
def cost(self) -> float:
return (
self.record.cost_of_works
if self.record.cost_of_works is not None
else float("inf")
)
def to_sqlalchemy(self) -> PlanPersistence:
scenario_record = self.scenario.record
scenario_model = ScenarioModel(
id=self.scenario.id,
name=scenario_record.name,
created_at=scenario_record.created_at,
housing_type=scenario_record.housing_type,
goal=scenario_record.goal,
goal_value=scenario_record.goal_value,
trigger_file_path=scenario_record.trigger_file_path,
multi_plan=scenario_record.multi_plan,
is_default=scenario_record.is_default,
budget=scenario_record.budget,
already_installed_file_path=scenario_record.already_installed_file_path,
patches_file_path=scenario_record.patches_file_path,
non_invasive_recommendations_file_path=scenario_record.non_invasive_recommendations_file_path,
exclusions=scenario_record.exclusions,
cost=scenario_record.cost,
contingency=scenario_record.contingency,
funding=scenario_record.funding,
total_work_hours=scenario_record.total_work_hours,
energy_savings=scenario_record.energy_savings,
co2_equivalent_savings=scenario_record.co2_equivalent_savings,
energy_cost_savings=scenario_record.energy_cost_savings,
epc_breakdown_pre_retrofit=scenario_record.epc_breakdown_pre_retrofit,
epc_breakdown_post_retrofit=scenario_record.epc_breakdown_post_retrofit,
number_of_properties=scenario_record.number_of_properties,
n_units_to_retrofit=scenario_record.n_units_to_retrofit,
co2_per_unit_pre_retrofit=scenario_record.co2_per_unit_pre_retrofit,
co2_per_unit_post_retrofit=scenario_record.co2_per_unit_post_retrofit,
energy_bill_per_unit_pre_retrofit=scenario_record.energy_bill_per_unit_pre_retrofit,
energy_bill_per_unit_post_retrofit=scenario_record.energy_bill_per_unit_post_retrofit,
energy_consumption_per_unit_pre_retrofit=scenario_record.energy_consumption_per_unit_pre_retrofit,
energy_consumption_per_unit_post_retrofit=scenario_record.energy_consumption_per_unit_post_retrofit,
valuation_improvement_per_unit=scenario_record.valuation_improvement_per_unit,
cost_per_unit=scenario_record.cost_per_unit,
cost_per_co2_saved=scenario_record.cost_per_co2_saved,
cost_per_sap_point=scenario_record.cost_per_sap_point,
valuation_return_on_investment=scenario_record.valuation_return_on_investment,
property_valuation_increase=scenario_record.property_valuation_increase,
labour_days=scenario_record.labour_days,
)
record = self.record
plan_model = PlanModel(
id=self.id,
property_id=record.property_id,
portfolio_id=record.portfolio_id,
scenario_id=self.scenario.id,
created_at=record.created_at,
is_default=record.is_default,
valuation_increase_lower_bound=record.valuation_increase_lower_bound,
valuation_increase_upper_bound=record.valuation_increase_upper_bound,
valuation_increase_average=record.valuation_increase_average,
plan_type=record.plan_type,
post_sap_points=record.post_sap_points,
post_epc_rating=record.post_epc_rating,
post_co2_emissions=record.post_co2_emissions,
co2_savings=record.co2_savings,
post_energy_bill=record.post_energy_bill,
energy_bill_savings=record.energy_bill_savings,
post_energy_consumption=record.post_energy_consumption,
energy_consumption_savings=record.energy_consumption_savings,
valuation_post_retrofit=record.valuation_post_retrofit,
valuation_increase=record.valuation_increase,
cost_of_works=record.cost_of_works,
contingency_cost=record.contingency_cost,
name=record.name,
)
return PlanPersistence(plan=plan_model, scenario=scenario_model)
def set_default(self, value: bool) -> None:
self.record = replace(self.record, is_default=value)
self.scenario.record = replace(self.scenario.record, is_default=value)
def _is_compliant_epc(self) -> bool:
goal_value: str = self.scenario.record.goal_value
if self.record.post_epc_rating:
post_epc = self.record.post_epc_rating.value
elif self.record.post_sap_points:
post_epc = sap_to_epc(self.record.post_sap_points)
else:
return False
return post_epc <= goal_value

View file

@ -0,0 +1,58 @@
from __future__ import annotations
from dataclasses import replace
from typing import Optional
from backend.app.db.models.recommendations import ScenarioModel
from backend.app.domain.records.scenario_record import ScenarioRecord
class Scenario:
def __init__(self, record: ScenarioRecord, id: Optional[int] = None):
self.id = id
self.record = record
@classmethod
def from_sqlalchemy(cls, scenario_model: ScenarioModel) -> Scenario:
record = ScenarioRecord(
name=scenario_model.name,
created_at=scenario_model.created_at,
housing_type=scenario_model.housing_type,
goal=scenario_model.goal,
goal_value=scenario_model.goal_value,
trigger_file_path=scenario_model.trigger_file_path,
multi_plan=scenario_model.multi_plan,
is_default=scenario_model.is_default,
budget=scenario_model.budget,
already_installed_file_path=scenario_model.already_installed_file_path,
patches_file_path=scenario_model.patches_file_path,
non_invasive_recommendations_file_path=scenario_model.non_invasive_recommendations_file_path,
exclusions=scenario_model.exclusions,
cost=scenario_model.cost,
contingency=scenario_model.contingency,
funding=scenario_model.funding,
total_work_hours=scenario_model.total_work_hours,
energy_savings=scenario_model.energy_savings,
co2_equivalent_savings=scenario_model.co2_equivalent_savings,
energy_cost_savings=scenario_model.energy_cost_savings,
epc_breakdown_pre_retrofit=scenario_model.epc_breakdown_pre_retrofit,
epc_breakdown_post_retrofit=scenario_model.epc_breakdown_post_retrofit,
number_of_properties=scenario_model.number_of_properties,
n_units_to_retrofit=scenario_model.n_units_to_retrofit,
co2_per_unit_pre_retrofit=scenario_model.co2_per_unit_pre_retrofit,
co2_per_unit_post_retrofit=scenario_model.co2_per_unit_post_retrofit,
energy_bill_per_unit_pre_retrofit=scenario_model.energy_bill_per_unit_pre_retrofit,
energy_bill_per_unit_post_retrofit=scenario_model.energy_bill_per_unit_post_retrofit,
energy_consumption_per_unit_pre_retrofit=scenario_model.energy_consumption_per_unit_pre_retrofit,
energy_consumption_per_unit_post_retrofit=scenario_model.energy_consumption_per_unit_post_retrofit,
valuation_improvement_per_unit=scenario_model.valuation_improvement_per_unit,
cost_per_unit=scenario_model.cost_per_unit,
cost_per_co2_saved=scenario_model.cost_per_co2_saved,
cost_per_sap_point=scenario_model.cost_per_sap_point,
valuation_return_on_investment=scenario_model.valuation_return_on_investment,
property_valuation_increase=scenario_model.property_valuation_increase,
labour_days=scenario_model.labour_days,
)
return cls(record, scenario_model.id)
def set_default(self, value: bool) -> None:
self.record = replace(self.record, is_default=value)

View file

@ -0,0 +1,32 @@
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
from backend.app.db.models.portfolio import Epc
from backend.app.db.models.recommendations import PlanTypeEnum
@dataclass(frozen=True)
class PlanRecord:
property_id: int
portfolio_id: int
created_at: datetime
is_default: bool
valuation_increase_lower_bound: Optional[float] = None
valuation_increase_upper_bound: Optional[float] = None
valuation_increase_average: Optional[float] = None
plan_type: Optional[PlanTypeEnum] = None
post_sap_points: Optional[float] = None
post_epc_rating: Optional[Epc] = None
post_co2_emissions: Optional[float] = None
co2_savings: Optional[float] = None
post_energy_bill: Optional[float] = None
energy_bill_savings: Optional[float] = None
post_energy_consumption: Optional[float] = None
energy_consumption_savings: Optional[float] = None
valuation_post_retrofit: Optional[float] = None
valuation_increase: Optional[float] = None
cost_of_works: Optional[float] = None
contingency_cost: Optional[float] = None
name: Optional[str] = None

View file

@ -0,0 +1,47 @@
from dataclasses import dataclass
from datetime import datetime
from typing import Optional
from backend.app.db.models.portfolio import PortfolioGoal
@dataclass(frozen=True)
class ScenarioRecord:
name: str
created_at: datetime
housing_type: str
goal: PortfolioGoal
goal_value: str
trigger_file_path: str
multi_plan: bool
is_default: bool
budget: Optional[float] = None
already_installed_file_path: Optional[str] = None
patches_file_path: Optional[str] = None
non_invasive_recommendations_file_path: Optional[str] = None
exclusions: Optional[str] = None
cost: Optional[float] = None
contingency: Optional[float] = None
funding: Optional[float] = None
total_work_hours: Optional[float] = None
energy_savings: Optional[float] = None
co2_equivalent_savings: Optional[float] = None
energy_cost_savings: Optional[float] = None
epc_breakdown_pre_retrofit: Optional[str] = None
epc_breakdown_post_retrofit: Optional[str] = None
number_of_properties: Optional[int] = None
n_units_to_retrofit: Optional[int] = None
co2_per_unit_pre_retrofit: Optional[str] = None
co2_per_unit_post_retrofit: Optional[str] = None
energy_bill_per_unit_pre_retrofit: Optional[str] = None
energy_bill_per_unit_post_retrofit: Optional[str] = None
energy_consumption_per_unit_pre_retrofit: Optional[str] = None
energy_consumption_per_unit_post_retrofit: Optional[str] = None
valuation_improvement_per_unit: Optional[str] = None
cost_per_unit: Optional[str] = None
cost_per_co2_saved: Optional[str] = None
cost_per_sap_point: Optional[str] = None
valuation_return_on_investment: Optional[str] = None
property_valuation_increase: Optional[float] = None
labour_days: Optional[float] = None

View file

@ -1,21 +1,29 @@
from typing import List
from uuid import UUID
import boto3
import json
import math
import asyncio
from contextlib import contextmanager
from sqlmodel import Session
from datetime import datetime
from fastapi import APIRouter, Depends
from backend.app.db.connection import db_session
from backend.app.db.models.tasks import SourceEnum
from backend.app.dependencies import validate_token
from backend.app.plan.schemas import PlanTriggerRequest
from backend.app.config import get_settings
from sqlalchemy.orm import sessionmaker
from backend.categorisation.categorisation_trigger_request import (
CategorisationTriggerRequest,
)
from utils.logger import setup_logger
from backend.app.db.connection import db_engine
from backend.app.db.functions.recommendations_functions import create_scenario
from backend.app.db.functions.recommendations_functions import (
create_scenario,
get_property_ids,
get_scenarios_count_by_portfolio_id,
)
from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface
logger = setup_logger()
@ -24,23 +32,88 @@ router = APIRouter(
prefix="/plan",
tags=["plan"],
dependencies=[Depends(validate_token)],
responses={404: {"description": "Not found"}}
responses={404: {"description": "Not found"}},
)
sqs_client = boto3.client("sqs")
settings = get_settings()
sqs_client = boto3.client("sqs", settings.AWS_DEFAULT_REGION)
@contextmanager
def db_session():
session = Session(db_engine)
try:
yield session
session.commit()
except Exception:
session.rollback()
raise
finally:
session.close()
@router.post("/categorisation", status_code=202)
async def trigger_categorisation(
body: CategorisationTriggerRequest,
) -> dict[str, str]:
payload: CategorisationTriggerRequest = CategorisationTriggerRequest.model_validate(
body
)
logger.info("API triggered with body: %s", payload)
property_ids: list[int] = get_property_ids(payload.portfolio_id)
property_ids.sort()
num_scenarios: int = get_scenarios_count_by_portfolio_id(payload.portfolio_id)
total_plans_to_update: int = len(property_ids) * num_scenarios
max_writes_per_batch: int = 1000
properties_per_batch: int = max(1, max_writes_per_batch // num_scenarios)
num_property_batches: int = math.ceil(len(property_ids) / properties_per_batch)
logger.info("total_plans_to_update: %s", total_plans_to_update)
logger.info("properties_per_batch: %s", properties_per_batch)
logger.info("num_property_batchess: %s", num_property_batches)
# Create task
task_id, _ = TasksInterface.create_task(
task_source="backend/plan/router.py:trigger_categorisation",
service="plan_categorisation",
inputs=payload.model_dump(),
task_only=True,
source=SourceEnum.PORTFOLIO,
source_id=str(payload.portfolio_id),
)
# Dispatch requests to lambdas
subtask_interface = SubTaskInterface()
for batch_index in range(num_property_batches):
start: int = batch_index * properties_per_batch
end: int = start + properties_per_batch
batch_property_ids: List[int] = property_ids[start:end]
if not batch_property_ids:
continue
batch_request: CategorisationTriggerRequest = CategorisationTriggerRequest(
portfolio_id=payload.portfolio_id,
scenarios_to_consider=payload.scenarios_to_consider,
scenario_priority_order=payload.scenario_priority_order,
min_property_id=min(batch_property_ids),
max_property_id=max(batch_property_ids),
)
# Create sub-task for each
subtask_id: UUID = subtask_interface.create_subtask(
task_id=task_id, inputs=batch_request.model_dump()
)
batch_request.subtask_id = str(subtask_id)
response = sqs_client.send_message(
QueueUrl=settings.CATEGORISATION_SQS_URL,
MessageBody=batch_request.model_dump_json(),
)
logger.info(
f"Chunk {batch_index} sent to SQS. {len(batch_property_ids)} Property IDs in batch (total "
f"{len(property_ids)}). Property IDs {min(batch_property_ids)}{max(batch_property_ids)}. Message ID: "
f"{response.get('MessageId')}"
)
await asyncio.sleep(0.05) # Small delay to avoid SQS throttling
return {"message": "Categorisation jobs distributed"}
@router.post("/trigger", status_code=202)
@ -50,8 +123,6 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
"""
logger.info("API triggered with body: %s", body)
settings = get_settings()
try:
data = body.model_dump()
except Exception as e:
@ -59,7 +130,10 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
return {"message": "Invalid request"}, 400
# If file_format is domna_asset_list and type is xlsx, read and chunk it
if data.get("file_format") == "domna_asset_list" and data.get("file_type") == "xlsx":
if (
data.get("file_format") == "domna_asset_list"
and data.get("file_type") == "xlsx"
):
try:
total_rows = data.get("sheet_count", 0)
@ -88,8 +162,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
"patches_file_path": body.patches_file_path,
"non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path,
"exclusions": body.exclusions,
"multi_plan": body.multi_plan
}
"multi_plan": body.multi_plan,
},
)
# Insert the scenario ID into the data payload
data["scenario_id"] = scenario_id
@ -99,7 +173,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
task_source="backend/plan/router.py:trigger_plan_entrypoint",
service="plan_engine",
inputs=data,
task_only=True
task_only=True,
)
subtask_interface = SubTaskInterface()
@ -109,13 +183,14 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
index_end = min((i + 1) * chunk_size, total_rows)
message_payload = {
**data, "index_start": index_start, "index_end": index_end,
**data,
"index_start": index_start,
"index_end": index_end,
}
# Create a subtask for this chunk
subtask_id = subtask_interface.create_subtask(
task_id=task_id,
inputs=message_payload
task_id=task_id, inputs=message_payload
)
# Add task and subtask to message
@ -125,8 +200,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
message_body = json.dumps(message_payload)
response = sqs_client.send_message(
QueueUrl=settings.ENGINE_SQS_URL,
MessageBody=message_body
QueueUrl=settings.ENGINE_SQS_URL, MessageBody=message_body
)
logger.info(
f"Chunk {i} sent to SQS. Rows {index_start}{index_end}. Message ID: {response.get('MessageId')}"
@ -153,8 +227,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
data["subtask_id"] = str(subtask_id)
message_body = json.dumps(data)
response = sqs_client.send_message(
QueueUrl=settings.ENGINE_SQS_URL,
MessageBody=message_body
QueueUrl=settings.ENGINE_SQS_URL, MessageBody=message_body
)
logger.info(f"SQS message sent. Message ID: {response.get('MessageId')}")
except Exception as e:

View file

@ -12,6 +12,10 @@ WALL_INSULATION_MEASURES = ["internal_wall_insulation", "external_wall_insulatio
ROOF_INSULATION_MEASURES = [
"loft_insulation", "flat_roof_insulation", "room_roof_insulation", "sloping_ceiling_insulation"
]
WALL_INSULATION_WITH_VENTILATION_MEASURES = [
"internal_wall_insulation+mechanical_ventilation", "external_wall_insulation+mechanical_ventilation",
"cavity_wall_insulation+mechanical_ventilation"
]
# Both all and roof insulaiton measures are eligible for ECO4. These are the remaining fabric and heating measures
# This is based on th measures we have recommendations for

View file

@ -1,5 +1,6 @@
import ast
import os
from typing import Optional
import msgpack
from uuid import UUID
from utils.s3 import read_from_s3
@ -24,7 +25,7 @@ def get_cleaned():
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name=get_settings().DATA_BUCKET
bucket_name=get_settings().DATA_BUCKET,
)
cleaned = msgpack.unpackb(cleaned, raw=False)
@ -56,32 +57,45 @@ def extract_property_request_data(
):
patch_has_uprn = "uprn" in patches[0] if patches else True
if patch_has_uprn:
patch = next((
x for x in patches if str(x["uprn"]) == str(address.uprn)
), {})
patch = next((x for x in patches if str(x["uprn"]) == str(address.uprn)), {})
else:
patch = next((
x for x in patches if (x["address"] == address.address) and (x["postcode"] == address.postcode)
), {})
patch = next(
(
x
for x in patches
if (x["address"] == address.address)
and (x["postcode"] == address.postcode)
),
{},
)
# Because we have some non-invasive recommendations that match on address and postcode, but not UPRN
# we need to check existence of uprn
has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False
has_uprn = (
"uprn" in non_invasive_recommendations[0]
if non_invasive_recommendations
else False
)
if has_uprn:
has_uprn = non_invasive_recommendations[0]["uprn"] not in ["", None]
if has_uprn:
property_non_invasive_recommendations = next((
x for x in non_invasive_recommendations if
(str(x["uprn"]) == str(uprn))
), {})
property_non_invasive_recommendations = next(
(x for x in non_invasive_recommendations if (str(x["uprn"]) == str(uprn))),
{},
)
# We patch the non-invasive recs that are ['cavity_extract_and_refill']
else:
property_non_invasive_recommendations = next((
x for x in non_invasive_recommendations if
(x["address"] == address.address) and (x["postcode"] == address.postcode)
), {})
property_non_invasive_recommendations = next(
(
x
for x in non_invasive_recommendations
if (x["address"] == address.address)
and (x["postcode"] == address.postcode)
),
{},
)
if isinstance(property_non_invasive_recommendations.get("recommendations"), str):
property_non_invasive_recommendations["recommendations"] = ast.literal_eval(
@ -90,7 +104,11 @@ def extract_property_request_data(
transformed = []
for rec in property_non_invasive_recommendations["recommendations"]:
if isinstance(rec, str):
transformed.append({"type": rec, })
transformed.append(
{
"type": rec,
}
)
else:
transformed.append(rec)
@ -102,26 +120,36 @@ def extract_property_request_data(
valuation_has_uprn = valuation_data[0]["uprn"] not in ["", None]
if valuation_has_uprn:
property_valuation = next((
float(x["valuation"]) for x in valuation_data if
(str(x["uprn"]) == str(uprn))
), None)
property_valuation = next(
(
float(x["valuation"])
for x in valuation_data
if (str(x["uprn"]) == str(uprn))
),
None,
)
else:
property_valuation = next((
float(x["valuation"]) for x in valuation_data if
(x["address"] == address.address) and (x["postcode"] == address.postcode)
), None)
property_valuation = next(
(
float(x["valuation"])
for x in valuation_data
if (x["address"] == address.address)
and (x["postcode"] == address.postcode)
),
None,
)
# Return data class to give a structured format
return PropertyRequestData(
patch=patch,
non_invasive_recommendations=property_non_invasive_recommendations,
valuation=property_valuation
valuation=property_valuation,
)
def parse_eco_packages(addr: Address, prepared_epc) -> tuple[list[str], int, str, list[str]] | tuple[
None, None, None, list]:
def parse_eco_packages(
addr: Address, prepared_epc
) -> tuple[list[str], int, str, list[str]] | tuple[None, None, None, list]:
solar_identification = addr.solar_reason
cavity_identification = addr.cavity_reason
if not solar_identification and not cavity_identification:
@ -140,47 +168,51 @@ def parse_eco_packages(addr: Address, prepared_epc) -> tuple[list[str], int, str
"Solar Eligible": {
"measures": ["solar_pv", "loft_insulation", "mechanical_ventilation"],
"target_sap": 86, # High B
"plan_type": "solar_eco4"
"plan_type": "solar_eco4",
},
"Solar Eligible, Solid Wall Uninsulated, EPC E or Below": {
"measures": ["solar_pv", "loft_insulation", "mechanical_ventilation"],
"target_sap": 86, # High B
"plan_type": "solar_eco4"
"plan_type": "solar_eco4",
},
"Solar Eligible, Needs Heating Upgrade": {
"measures": ["solar_pv", "loft_insulation", "high_heat_retention_storage_heaters",
"mechanical_ventilation"],
"measures": [
"solar_pv",
"loft_insulation",
"high_heat_retention_storage_heaters",
"mechanical_ventilation",
],
"target_sap": 86, # High B
"plan_type": "solar_hhrsh_eco4"
"plan_type": "solar_hhrsh_eco4",
},
"Non-Intrusive Data Shows Empty Cavity": {
"measures": ["cavity_wall_insulation", "mechanical_ventilation"],
"target_sap": 69, # Low C
"plan_type": "empty_cavity_eco"
"plan_type": "empty_cavity_eco",
},
'Non-Intrusive Data Shows Empty Cavity, built after 2002': {
"Non-Intrusive Data Shows Empty Cavity, built after 2002": {
"measures": ["cavity_wall_insulation", "mechanical_ventilation"],
"target_sap": 69, # Low C
"plan_type": "empty_cavity_eco"
"plan_type": "empty_cavity_eco",
},
"EPC Shows Empty Cavity, inspections show retro drilled": {
# EPC Indicates it's empty, so we simulate a fill
"measures": ["cavity_wall_insulation", "mechanical_ventilation"],
"target_sap": 69, # Low C
"plan_type": "extraction_eco"
"plan_type": "extraction_eco",
},
"EPC Shows Empty Cavity, inspections show filled at build": {
# EPC Indicates it's empty, so we simulate a fill
"measures": ["cavity_wall_insulation", "mechanical_ventilation"],
"target_sap": 69, # Low C
"plan_type": "extraction_eco"
"plan_type": "extraction_eco",
},
"EPC Shows Empty Cavity": {
# EPC Indicates it's empty, so we simulate a fill
"measures": ["cavity_wall_insulation", "mechanical_ventilation"],
"target_sap": 69, # Low C
"plan_type": "empty_cavity_eco"
}
"plan_type": "empty_cavity_eco",
},
}
# Always prioritise solar
@ -214,9 +246,13 @@ def build_cloudwatch_log_url(start_ms: int) -> str:
Build a CloudWatch Logs URL for the current Lambda invocation,
including timestamp window from start_ms to end_ms (epoch ms).
"""
logger.info("Building cloudwatch logs URL")
region = os.environ["AWS_REGION"]
logger.info("Building cloudwatch logs URL: Got AWS region")
log_group = os.environ["AWS_LAMBDA_LOG_GROUP_NAME"]
logger.info("Building cloudwatch logs URL: Got lambda log group name")
log_stream = os.environ["AWS_LAMBDA_LOG_STREAM_NAME"]
logger.info("Building cloudwatch logs URL: Got lambda log stream name")
# CloudWatch console requires / encoded as $252F
encoded_group = log_group.replace("/", "$252F")
@ -232,15 +268,21 @@ def build_cloudwatch_log_url(start_ms: int) -> str:
)
def handle_error(msg, e, subtask_id, status=500, start_ms=None):
def handle_error(
msg: str,
exception: Exception,
subtask_id: str,
status_code: int = 500,
start_ms: Optional[int] = None,
):
# When the pipeline fails, handles error process
cloud_logs_url = build_cloudwatch_log_url(start_ms)
SubTaskInterface().update_subtask_status(
subtask_id=UUID(subtask_id),
status="failed",
outputs=str(e),
cloud_logs_url=cloud_logs_url
outputs=str(exception),
cloud_logs_url=cloud_logs_url,
)
logger.error(msg, exc_info=True)
return Response(status_code=status, content=msg)
return Response(status_code=status_code, content=msg)

View file

@ -10,7 +10,7 @@ mangum==0.19.0
# AWS
boto3==1.35.44
# Data
openpyxl==3.1.2
openpyxl==3.1.5
# Basic
pytz
sqlmodel

View file

@ -9,7 +9,7 @@ from backend.app.tasks.schema import (
CreateSubTaskRequest,
UpdateSubTaskStatusRequest,
FinalizeSubTaskRequest,
TaskSqsTriggerRequest
TaskSqsTriggerRequest,
)
# Correct location of interfaces
@ -51,18 +51,18 @@ async def get_task(task_id: UUID):
if not task:
raise HTTPException(status_code=404, detail="Task not found")
subtasks = session.exec(
select(SubTask).where(SubTask.taskId == task_id)
).all()
subtasks = session.exec(select(SubTask).where(SubTask.taskId == task_id)).all()
formatted = []
for st in subtasks:
formatted.append({
**st.dict(),
"inputs": json.loads(st.inputs) if st.inputs else None,
"outputs": json.loads(st.outputs) if st.outputs else None,
"cloud_logs_url": st.cloudLogsURL,
})
formatted.append(
{
**st.dict(),
"inputs": json.loads(st.inputs) if st.inputs else None,
"outputs": json.loads(st.outputs) if st.outputs else None,
"cloud_logs_url": st.cloudLogsURL,
}
)
return {
"task": task,
@ -111,7 +111,10 @@ async def update_subtask_status(subtask_id: UUID, req: UpdateSubTaskStatusReques
# ===
# Sub task is complete
@router.post("/subtask/{subtask_id}/finalize", summary="Finalize a subtask with status, outputs, logs")
@router.post(
"/subtask/{subtask_id}/finalize",
summary="Finalize a subtask with status, outputs, logs",
)
async def finalize_subtask(subtask_id: UUID, req: FinalizeSubTaskRequest):
subtasks = SubTaskInterface()
@ -120,7 +123,7 @@ async def finalize_subtask(subtask_id: UUID, req: FinalizeSubTaskRequest):
subtask_id=subtask_id,
status=req.status,
outputs=req.outputs,
cloud_logs_url=req.cloud_logs_url
cloud_logs_url=req.cloud_logs_url,
)
return {
@ -142,9 +145,10 @@ from backend.app.tasks.schema import TaskSqsTriggerRequest
from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface
from backend.app.config import get_settings
sqs = boto3.client("sqs")
@router.post("/trigger", summary="Create task + subtask and publish to SQS", status_code=202)
@router.post(
"/trigger", summary="Create task + subtask and publish to SQS", status_code=202
)
async def trigger_task(req: TaskSqsTriggerRequest):
"""
Creates a Task + SubTask, then pushes the SubTask into SQS so a Lambda can process it.
@ -152,11 +156,12 @@ async def trigger_task(req: TaskSqsTriggerRequest):
"""
settings = get_settings()
sqs = boto3.client("sqs", settings.AWS_DEFAULT_REGION)
tasks = TasksInterface()
# ---- Normalize empty inputs ----
inputs = req.inputs or {} # ensures {} even if null
inputs = req.inputs or {} # ensures {} even if null
# ---- 1. Create Task + SubTask ----
task_id, subtask_id = tasks.create_task(
@ -174,8 +179,8 @@ async def trigger_task(req: TaskSqsTriggerRequest):
try:
response = sqs.send_message(
QueueUrl=f"https://sqs.{settings.AWS_REGION}.amazonaws.com/"
f"{settings.AWS_ACCOUNT_ID}/lambda-example-queue",
MessageBody=json.dumps(sqs_payload)
f"{settings.AWS_ACCOUNT_ID}/lambda-example-queue",
MessageBody=json.dumps(sqs_payload),
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"SQS error: {e}")
@ -186,4 +191,4 @@ async def trigger_task(req: TaskSqsTriggerRequest):
"subtask_id": subtask_id,
"sqs_message_id": response.get("MessageId"),
"inputs_sent": inputs,
}
}

View file

View file

@ -0,0 +1,17 @@
from typing import List, Optional
from pydantic import BaseModel
class CategorisationTriggerRequest(BaseModel):
portfolio_id: int
scenarios_to_consider: Optional[List[int]] = None
scenario_priority_order: Optional[List[int]] = None
min_property_id: Optional[int] = None
max_property_id: Optional[int] = None
subtask_id: Optional[str] = None
# {"portfolio_id": 556, "scenarios_to_consider": [1039,1041], "scenario_priority_order": [1041,1039]}

View file

@ -0,0 +1,42 @@
FROM public.ecr.aws/lambda/python:3.11
# For local running:
# FROM python:3.11.10-bullseye
ARG DEV_DB_HOST
ARG DEV_DB_PORT
ARG DEV_DB_NAME
# Set working directory (Lambda task root)
WORKDIR /var/task
# Environment
ENV DB_HOST=${DEV_DB_HOST}
ENV DB_PORT=${DEV_DB_PORT}
ENV DB_NAME=${DEV_DB_NAME}
COPY backend/.env.test backend/.env
# -----------------------------
# Copy requirements FIRST (for Docker layer caching)
# -----------------------------
COPY backend/categorisation/handler/requirements.txt .
# Install dependencies into Lambda runtime
RUN pip install --no-cache-dir -r requirements.txt
# -----------------------------
# Copy application code
# -----------------------------
COPY utils/ utils/
# NOTE: if build is ever slow we can be more specific with which files are copied
COPY backend/ backend/
COPY datatypes/ datatypes/
# -----------------------------
# Lambda handler
# -----------------------------
CMD ["backend/categorisation/handler/handler.handler"]
# For local running
# CMD ["python", "-m", "backend.categorisation.handler.handler"]

View file

@ -0,0 +1,34 @@
import json
import time
from typing import Any, Mapping
from backend.app.db.functions.tasks.Tasks import SubTaskInterface
from backend.app.plan.utils import build_cloudwatch_log_url
from backend.categorisation.categorisation_trigger_request import (
CategorisationTriggerRequest,
)
from backend.categorisation.processor import process_portfolio
from utils.logger import setup_logger
logger = setup_logger()
def handler(event: Mapping[str, Any], context: Any) -> None:
logger.info("Received message")
logger.info(f"Number of events: {len(event.get('Records', []))}")
for record in event.get("Records", []):
try:
body_dict = json.loads(record["body"])
logger.debug("Validating request body")
payload = CategorisationTriggerRequest.model_validate(body_dict)
logger.debug("Successfully validated request body")
process_portfolio(payload)
except Exception as e:
logger.info("Handler exception")
logger.error(f"Failed to process record: {e}")

View file

@ -0,0 +1,10 @@
sqlmodel
pydantic-settings
psycopg2-binary==2.9.10
starlette
# Not used but needed to satisfy imports
pytz==2024.2
msgpack==1.1.0
numpy<2
pandas==2.2.3

View file

@ -0,0 +1,11 @@
version: "3.9"
services:
categorisation-lambda:
build:
context: ../../../
dockerfile: backend/categorisation/handler/Dockerfile
ports:
- "9000:8080"
env_file:
- ../../../.env

View file

@ -0,0 +1,31 @@
#!/usr/bin/env python3
import json
import requests
HOST = "localhost"
PORT = "9000"
LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
payload = {
"Records": [
{
"body": json.dumps(
{
"portfolio_id": 569,
"scenarios_to_consider": [],
"scenario_priority_order": [],
"min_property_id": 660418,
"max_property_id": 660917,
"subtask_id": "6a0bcbac-ddab-435f-8708-8acd4662b067",
}
)
}
]
}
response = requests.post(LAMBDA_URL, json=payload)
print("Status code:", response.status_code)
print("Response:")
print(response.text)

View file

@ -0,0 +1,24 @@
from typing import List
from backend.categorisation.categorisation_trigger_request import (
CategorisationTriggerRequest,
)
from backend.categorisation.processor import process_portfolio
def main() -> None:
portfolio_id = 556
scenarios_to_consider: List[int] = []
scenario_priority_order: List[int] = []
process_portfolio(
CategorisationTriggerRequest(
portfolio_id=portfolio_id,
scenarios_to_consider=scenarios_to_consider,
scenario_priority_order=scenario_priority_order,
)
)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,261 @@
import time
from collections import defaultdict
from typing import Dict, List, Optional
from uuid import UUID
from starlette.responses import Response
from backend.app.db.functions.recommendations_functions import (
bulk_update_plans,
get_default_plans,
get_most_recent_plans_by_portfolio_id,
get_most_recent_plans_by_scenario_ids,
get_scenarios_by_portfolio_id,
)
from backend.app.db.functions.tasks.Tasks import SubTaskInterface
from backend.app.db.models.recommendations import PlanModel, ScenarioModel
from backend.app.domain.classes.plan import Plan
from backend.app.domain.classes.scenario import Scenario
from backend.app.plan.utils import build_cloudwatch_log_url, handle_error
from backend.categorisation.categorisation_trigger_request import (
CategorisationTriggerRequest,
)
from utils.logger import setup_logger
logger = setup_logger()
def process_portfolio(
body: CategorisationTriggerRequest,
) -> Response: # TODO: make this a class
portfolio_id: int = body.portfolio_id
scenarios_to_consider: Optional[List[int]] = body.scenarios_to_consider
scenario_priority_order: Optional[List[int]] = body.scenario_priority_order
min_property_id: Optional[int] = body.min_property_id
max_property_id: Optional[int] = body.max_property_id
subtask_id: Optional[str] = body.subtask_id
logger.info(f"Processing portfolio {portfolio_id}")
start_ms = int(time.time() * 1000)
cloud_logs_url = build_cloudwatch_log_url(start_ms)
if body.subtask_id:
SubTaskInterface().update_subtask_status(
subtask_id=UUID(subtask_id),
status="in progress",
cloud_logs_url=cloud_logs_url,
)
try:
all_scenarios: List[Scenario] = _load_scenarios_for_portfolio(portfolio_id)
plans_by_id: Dict[int, Plan] = (
{}
) # TODO: make this an in-memory repository class
if scenarios_to_consider:
if len(scenarios_to_consider) < 2:
raise ValueError(
"Cannot run auto categorisation for fewer than 2 scenarios"
)
# first get all plans that we're interested in
plans_for_consideration: List[Plan] = _load_plans_for_portfolio(
portfolio_id,
all_scenarios,
scenarios_to_consider,
min_property_id,
max_property_id,
)
for plan in plans_for_consideration:
if plan.id is not None: # just in case
plans_by_id[plan.id] = plan
# then unset existing defaults on domain objects regardless of whether they're under consideration or not
default_plans: List[Plan] = _get_default_plans(
portfolio_id, all_scenarios, min_property_id, max_property_id
)
for plan in default_plans:
plan.set_default(False)
if plan.id is not None: # just in case
plans_by_id[plan.id] = plan
logger.info(f"Successfully unset {len(default_plans)} default plan(s)")
# then set new defaults on domain objects under consideration
plans_for_consideration_by_property: Dict[int, List[Plan]] = (
_group_plans_by_property(plans_for_consideration)
)
for property_id, property_plans in plans_for_consideration_by_property.items():
if not property_plans:
raise ValueError(f"No plans for property {property_id}")
try:
cheapest_plan = choose_cheapest_relevant_plan(
property_plans, scenario_priority_order
)
except Exception:
logger.error(f"Failed to find cheapest plan for property {property_id}")
raise
property_plans = _update_plan_objects(property_plans, cheapest_plan)
for plan in property_plans:
if plan.id is not None: # just in case
plans_by_id[plan.id] = plan
logger.info("Successfully set defaults on Plan objects in memory")
# then pass all domain objects to database to update (regardless of whether they've changed)
_update_plans_in_db(list(plans_by_id.values()))
# Mark the subtask as successful
logger.info(f"Successfully updated {len(plans_by_id)} Plans in database")
if body.subtask_id:
SubTaskInterface().update_subtask_status(
subtask_id=UUID(subtask_id),
status="complete",
cloud_logs_url=cloud_logs_url,
)
return Response(status_code=200)
except Exception as e:
if subtask_id:
return handle_error(
"Exception during Categorisation processing.",
e,
subtask_id,
500,
start_ms,
)
raise
def choose_cheapest_relevant_plan(
plans: List[Plan], scenario_priority_order: Optional[List[int]] = None
) -> Plan:
scenario_priority_order = scenario_priority_order or []
eligible_plans: List[Plan] = [plan for plan in plans if plan.is_compliant] or plans
if not eligible_plans:
raise ValueError("No plans available to choose from.")
for plan in eligible_plans:
if plan.id is None:
# This should never actually happen, but plan.id is optional to cater
# for new plans. We are only working with already persisted plans here
raise ValueError(
f"All plans must have an ID, but found a plan with no ID: {plan}"
)
min_cost: float = min(plan.cost for plan in eligible_plans)
cheapest_plans: List[Plan] = [
plan for plan in eligible_plans if plan.cost == min_cost
]
for priority_scenario_id in scenario_priority_order:
for plan in cheapest_plans:
if plan.scenario.id == priority_scenario_id:
return plan
return cheapest_plans[0]
def _get_default_plans(
portfolio_id: int,
scenarios: List[Scenario],
min_property_id: Optional[int] = None,
max_property_id: Optional[int] = None,
) -> List[Plan]:
default_plan_models = get_default_plans(
portfolio_id, min_property_id, max_property_id
)
scenario_map = {s.id: s for s in scenarios}
return [
Plan.from_sqlalchemy(p, scenario_map[p.scenario_id])
for p in default_plan_models
if p.scenario_id in scenario_map
]
def _load_scenarios_for_portfolio(portfolio_id: int) -> List[Scenario]:
scenario_models: List[ScenarioModel] = get_scenarios_by_portfolio_id(portfolio_id)
return [Scenario.from_sqlalchemy(s) for s in scenario_models]
def _load_plans_for_portfolio(
portfolio_id: int,
all_scenarios: List[Scenario],
scenarios_to_consider: Optional[List[int]] = None,
min_property_id: Optional[int] = None,
max_property_id: Optional[int] = None,
) -> List[Plan]:
if scenarios_to_consider:
logger.info(f"Getting plans for {len(scenarios_to_consider)} scenarios")
plan_models: List[PlanModel] = get_most_recent_plans_by_scenario_ids(
scenarios_to_consider, min_property_id, max_property_id
)
logger.info(f"Got {len(plan_models)} plan models from database")
else:
logger.info(
f"No list of Plans to consider provided. Getting all Plans for portfolio {portfolio_id}"
)
plan_models: List[PlanModel] = get_most_recent_plans_by_portfolio_id(
portfolio_id, min_property_id, max_property_id
)
plans: List[Plan] = []
if not all_scenarios:
raise Exception(f"No scenarios found for Portfolio {portfolio_id}")
for model in plan_models:
scenario = next((s for s in all_scenarios if s.id == model.scenario_id))
if not scenario:
logger.info(f"No Scenario associated with Plan of ID {model.id}")
continue
plans.append(Plan.from_sqlalchemy(model, scenario))
logger.info(f"Got {len(plans)} Plans")
return plans
def _group_plans_by_property(plans: List[Plan]) -> Dict[int, List[Plan]]:
grouped: dict[int, List[Plan]] = defaultdict(list)
for plan in plans:
grouped[plan.record.property_id].append(plan)
return grouped
def _update_plan_objects(plans: List[Plan], cheapest_plan: Plan) -> List[Plan]:
for plan in plans:
should_be_default: bool = plan.id == cheapest_plan.id
plan.set_default(should_be_default)
if should_be_default:
logger.debug(
f"Setting Plan {plan.id} (Scenario Name: {plan.scenario.record.name}) to default"
)
return plans
def _update_plans_in_db(plans: List[Plan]) -> None:
plan_models: List[PlanModel] = []
scenario_models: List[ScenarioModel] = []
for plan in plans:
plan_model, scenario_model = plan.to_sqlalchemy()
plan_models.append(plan_model)
scenario_models.append(scenario_model)
bulk_update_plans(plan_models, scenario_models)

View file

@ -0,0 +1,73 @@
from typing import Callable
import pytest
from datetime import datetime
from backend.app.domain.classes.plan import Plan
from backend.app.domain.classes.scenario import Scenario
from backend.app.domain.records.plan_record import PlanRecord
from backend.app.domain.records.scenario_record import ScenarioRecord
from backend.app.db.models.portfolio import Epc, PortfolioGoal
@pytest.fixture
def created_at_datetime() -> datetime:
return datetime.now()
@pytest.fixture
def epc_c_scenario(created_at_datetime: datetime) -> "Scenario":
# arrange
scenario_record = ScenarioRecord(
name="EPC C",
created_at=created_at_datetime,
housing_type="",
goal=PortfolioGoal.INCREASING_EPC,
goal_value="C",
trigger_file_path="",
multi_plan=False,
is_default=False,
)
return Scenario(record=scenario_record, id=1)
@pytest.fixture
def plan_factory(
epc_c_scenario: "Scenario", created_at_datetime: datetime
) -> Callable[[int, "Epc"], "Plan"]:
# returns a function to create plans with different attributes
def _create_plan(post_sap_points: int, post_epc_rating: "Epc") -> "Plan":
plan_record = PlanRecord(
property_id=1,
portfolio_id=1,
created_at=created_at_datetime,
is_default=False,
post_sap_points=post_sap_points,
post_epc_rating=post_epc_rating,
)
return Plan(record=plan_record, scenario=epc_c_scenario, id=1)
return _create_plan
@pytest.mark.parametrize(
"post_sap_points, post_epc_rating, expected_compliance",
[
(75, Epc.C, True),
(100, Epc.A, True),
(60, Epc.D, False),
],
)
def test_scenario_goal_is_epc_c(
plan_factory: Callable[[int, "Epc"], "Plan"],
post_sap_points: int,
post_epc_rating: "Epc",
expected_compliance: bool,
) -> None:
# arrange
plan = plan_factory(post_sap_points, post_epc_rating)
# act
actual_compliance: bool = plan.is_compliant
# assert
assert actual_compliance == expected_compliance

View file

@ -0,0 +1,160 @@
from datetime import datetime
from typing import List, Optional
import pytest
from backend.app.domain.classes.plan import Plan
from backend.app.domain.classes.scenario import Scenario
from backend.app.domain.records.plan_record import PlanRecord
from backend.app.domain.records.scenario_record import ScenarioRecord
from backend.app.db.models.portfolio import Epc, PortfolioGoal
from backend.categorisation.processor import choose_cheapest_relevant_plan
@pytest.fixture
def created_at_datetime() -> datetime:
return datetime.now()
def make_plan_record(
created_at: datetime, default: bool, cost_of_works: Optional[float] = 500.0
) -> PlanRecord:
return PlanRecord(
property_id=1,
portfolio_id=1,
created_at=created_at,
is_default=default,
post_epc_rating=Epc.C,
cost_of_works=cost_of_works,
)
def make_scenario(name: str, created_at: datetime, is_default: bool) -> Scenario:
record = ScenarioRecord(
name=name,
created_at=created_at,
housing_type="",
goal=PortfolioGoal.INCREASING_EPC,
goal_value="C",
trigger_file_path="",
multi_plan=False,
is_default=is_default,
)
return Scenario(record=record, id=3 if is_default else 4)
def make_plan(
created_at: datetime,
default: bool,
cost_of_works: Optional[float] = 500.0,
name: str = "",
) -> Plan:
scenario = make_scenario(name, created_at, default)
plan_id = 1 if default else 2
return Plan(
record=make_plan_record(created_at, default, cost_of_works),
scenario=scenario,
id=plan_id,
)
def test_prioritised_scenario_selected(created_at_datetime: datetime) -> None:
# arrange
epc_c_plan = make_plan(created_at_datetime, True, name="EPC C")
minor_works_plan = make_plan(created_at_datetime, False, name="EPC C - Minor Works")
scenario_priority_order: List[int] = [4, 3]
expected_default_plan_id = 2
# act
actual_default_plan = choose_cheapest_relevant_plan(
plans=[epc_c_plan, minor_works_plan],
scenario_priority_order=scenario_priority_order,
)
# assert
assert actual_default_plan.id == expected_default_plan_id
def test_cheapest_plan_returned_if_not_in_priority_list(
created_at_datetime: datetime,
) -> None:
# arrange
epc_c_plan = make_plan(
created_at_datetime, True, cost_of_works=1000.0, name="EPC C"
)
minor_works_plan = make_plan(
created_at_datetime, False, cost_of_works=100.0, name="EPC C - Minor Works"
)
scenario_priority_order: List[int] = [3, 5]
expected_default_plan_id = 2
# act
actual_default_plan = choose_cheapest_relevant_plan(
plans=[epc_c_plan, minor_works_plan],
scenario_priority_order=scenario_priority_order,
)
# assert
assert actual_default_plan.id == expected_default_plan_id
def test_all_plans_zero_cost__highest_priority_returned(
created_at_datetime: datetime,
) -> None:
# arrange
epc_c_plan = make_plan(created_at_datetime, True, cost_of_works=0.0, name="EPC C")
minor_works_plan = make_plan(
created_at_datetime, False, cost_of_works=0.0, name="EPC C - Minor Works"
)
scenario_priority_order: List[int] = [4, 3]
expected_default_plan_id = 2
# act
actual_default_plan = choose_cheapest_relevant_plan(
plans=[epc_c_plan, minor_works_plan],
scenario_priority_order=scenario_priority_order,
)
# assert
assert actual_default_plan.id == expected_default_plan_id
def test_some_plans_zero_cost__cheapest_returned(
created_at_datetime: datetime,
) -> None:
# arrange
epc_c_plan = make_plan(created_at_datetime, True, cost_of_works=0.0, name="EPC C")
minor_works_plan = make_plan(
created_at_datetime, False, cost_of_works=50.0, name="EPC C - Minor Works"
)
scenario_priority_order: List[int] = [4, 3]
expected_default_plan_id = 1
# act
actual_default_plan = choose_cheapest_relevant_plan(
plans=[epc_c_plan, minor_works_plan],
scenario_priority_order=scenario_priority_order,
)
# assert
assert actual_default_plan.id == expected_default_plan_id
def test_all_plans_null_cost__highest_priority_returned(
created_at_datetime: datetime,
) -> None:
# arrange
epc_c_plan = make_plan(created_at_datetime, True, cost_of_works=None, name="EPC C")
minor_works_plan = make_plan(
created_at_datetime, False, cost_of_works=None, name="EPC C - Minor Works"
)
scenario_priority_order: List[int] = [4, 3]
expected_default_plan_id = 2
# act
actual_default_plan = choose_cheapest_relevant_plan(
plans=[epc_c_plan, minor_works_plan],
scenario_priority_order=scenario_priority_order,
)
# assert
assert actual_default_plan.id == expected_default_plan_id

View file

@ -29,5 +29,5 @@ class ConditionTriggerRequest(BaseModel):
# {
# "file_type": "LBWF",
# "trigger_file_bucket": "condition-data-dev",
# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx",
# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx"
# }

View file

@ -0,0 +1,11 @@
version: "3.9"
services:
categorisation-lambda:
build:
context: ../
dockerfile: backend/categorisation/handler/Dockerfile
ports:
- "9000:8080"
env_file:
- ../.env

View file

@ -1191,14 +1191,18 @@ async def model_engine(body: PlanTriggerRequest):
property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures]
measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures]
ventilation_included = "ventilation" in property_measure_types
# TODO - formalise property measure types into an enum
ventilation_included = (
"ventilation" in property_measure_types or "mechanical_ventilation" in property_measure_types
)
# If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore
# its inclusion
needs_ventilation = any(
x in property_measure_types for x in assumptions.measures_needing_ventilation
) and not p.has_ventilation and ventilation_included
needs_ventilation = optimiser_functions.check_needs_ventilation(
property_measure_types, assumptions.measures_needing_ventilation, p.has_ventilation,
ventilation_included
)
if not measures_to_optimise:
# Nothing to do, we just reshape the recommendations
@ -1315,7 +1319,7 @@ async def model_engine(body: PlanTriggerRequest):
recommendations=recommendations, selected=selected,
)
# Add best practice measures (ventilation/trickle vents)
# Add best practice measures (ventilation/trickle vents) - pass needs_ventilation flag
selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected)
# Final flattening - we pass what the battery SAP score would be, regardless if the battery was selected
recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults(

169
backend/export/README.md Normal file
View file

@ -0,0 +1,169 @@
# 🧪 Running Tests in PyCharm (macOS + pytest-postgresql)
Our test suite uses `pytest` and `pytest-postgresql`, which
automatically spins up a temporary PostgreSQL instance.
On Linux (including GitHub Actions), PostgreSQL binaries are installed
in standard system locations.\
On macOS (Homebrew), they are not --- so PyCharm needs a small
configuration tweak to locate `pg_ctl`.
This guide explains how to run and debug tests locally in PyCharm
without modifying test code.
------------------------------------------------------------------------
## ✅ Prerequisites
### Devcontainer
Postgres install is included in the devcontainer, so no additional setup is needed.
Running
```bash
make test
```
Will instigate the test suite, which will automatically start a temporary PostgreSQL instance.
### Local MacOS
1. Install PostgreSQL via Homebrew:
``` bash
brew install postgresql
```
2. Confirm `pg_ctl` exists:
``` bash
which pg_ctl
```
Typical output:
/opt/homebrew/bin/pg_ctl
------------------------------------------------------------------------
# 🚀 Running Tests in PyCharm
## Step 1 --- Create a PyCharm pytest Run Configuration
1. Open the test file.
2. Click the green ▶ next to the test.
3. Choose **"Edit Run Configuration..."**
You should see something like:
- **Target:** `backend/export/tests/test_export.py`
- **Working directory:** Project root (e.g.`Model/`)
------------------------------------------------------------------------
## Step 2 --- Add Required Override (macOS Only)
In the Run Configuration:
### ➜ "Additional Arguments"
Add:
--override-ini=postgresql_exec=/opt/homebrew/bin/pg_ctl
This tells `pytest-postgresql` where `pg_ctl` lives on macOS.
Without this, PyCharm may fail with:
ExecutableMissingException: Could not found pg_config executable
------------------------------------------------------------------------
## Step 3 --- Run or Debug
You can now:
- Click ▶ Run\
- Click 🐞 Debug\
- Set breakpoints normally
The temporary PostgreSQL instance will start automatically.
------------------------------------------------------------------------
# 🔍 Why This Is Needed
`pytest-postgresql` defaults to a Linux-style path:
/usr/lib/postgresql/<version>/bin/pg_ctl
That path exists on Ubuntu (CI), but not on macOS.
On macOS, Homebrew installs PostgreSQL in:
/opt/homebrew/bin/
The `--override-ini` flag safely overrides the executable path
**locally**, without modifying:
- test files\
- `conftest.py`\
- `pytest.ini`\
- CI configuration
This ensures:
- ✅ Tests still work in GitHub Actions\
- ✅ Tests still work for Linux users\
- ✅ macOS developers can debug in PyCharm\
- ✅ No repository-specific hacks are required
------------------------------------------------------------------------
# 🛠 Optional: Using a Local `.env` File
If you prefer not to hardcode the override in the run configuration:
1. Create a local file:
```{=html}
<!-- -->
```
.env.local
2. Add:
```{=html}
<!-- -->
```
PYTEST_ADDOPTS=--override-ini=postgresql_exec=/opt/homebrew/bin/pg_ctl
3. In PyCharm:
- Open the Run Configuration
- Add `.env.local` under **"Paths to .env files"**
------------------------------------------------------------------------
# 🧪 Running Tests via Terminal (Recommended for CI Parity)
For normal execution outside PyCharm:
``` bash
make test
```
These already work without additional configuration.
------------------------------------------------------------------------
# 🧠 Summary
Environment Works Without Override? Needs `--override-ini`?
------------------------ ------------------------- -------------------------
GitHub Actions (Linux) ✅ Yes ❌ No
Linux local ✅ Yes ❌ No
macOS terminal (tox) ✅ Yes ❌ No
macOS PyCharm debugger ❌ No ✅ Yes

View file

@ -0,0 +1,227 @@
from typing import List, Any, Dict, Optional, Tuple, Sequence
import pandas as pd
from sqlalchemy import select
from sqlalchemy.orm import Session
from sqlalchemy.engine import Row
from collections import defaultdict
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
RecommendationMaterials,
)
from backend.app.db.models.portfolio import (
PropertyModel,
PropertyDetailsEpcModel,
)
from backend.app.db.models.materials import Material
from utils.logger import setup_logger
logger = setup_logger()
class DbMethods:
def __init__(self, session: Session) -> None:
self.session = session
def get_properties(self, portfolio_id: int) -> pd.DataFrame:
"""
Function to fetch the property data, for property scenario exports
:param portfolio_id:
:return:
"""
stmt = (
select(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.where(PropertyModel.portfolio_id == portfolio_id)
)
rows: Sequence[Row[Tuple[PropertyModel, PropertyDetailsEpcModel]]] = (
self.session.execute(stmt).all()
)
data: List[Dict[str, Any]] = [
{
**{
col.name: getattr(property_model, col.name)
for col in PropertyModel.__table__.columns.values()
},
**{
col.name: getattr(epc_model, col.name)
for col in PropertyDetailsEpcModel.__table__.columns.values()
},
}
for property_model, epc_model in rows
]
return pd.DataFrame(data)
def get_latest_plans(
self,
portfolio_id: int,
scenario_ids: Optional[List[int]] = None,
default_only: bool = False,
) -> pd.DataFrame:
"""
Fetch latest plans.
Modes:
1) Scenario mode: latest per (scenario_id, property_id)
2) Default mode: latest default plan per property (ignores scenario_ids)
"""
# -----------------------------
# Sanity checks
# -----------------------------
if default_only and scenario_ids:
# Override scenario_ids to make it explicit that they will be ignored in the query
scenario_ids = None
if not default_only and not scenario_ids:
raise ValueError(
"Either scenario_ids must be provided "
"or default_only must be True."
)
# -----------------------------
# Filter on just the default plans - we ignore the scenario ids. NOTE - this is specific to postgres
# and relies on DISTINCT ON behaviour.
# -----------------------------
if default_only:
# Latest default plan per property (ignore scenarios entirely)
# DISTINCT ON (property_id) keeps the first row per property,
# ordered by created_at DESC so we get the newest one.
stmt = (
select(PlanModel)
.where(
PlanModel.portfolio_id == portfolio_id,
PlanModel.is_default.is_(True),
)
.distinct(PlanModel.property_id)
.order_by(
PlanModel.property_id,
PlanModel.created_at.desc(),
)
)
else:
# Latest plan per (scenario_id, property_id)
# DISTINCT ON (scenario_id, property_id) keeps the newest
# plan per scenario/property combination.
assert scenario_ids is not None
stmt = (
select(PlanModel)
.where(
PlanModel.portfolio_id == portfolio_id,
PlanModel.scenario_id.in_(scenario_ids),
)
.distinct(
PlanModel.scenario_id,
PlanModel.property_id,
)
.order_by(
PlanModel.scenario_id,
PlanModel.property_id,
PlanModel.created_at.desc(),
)
)
logger.info("Fetching plans")
plans: Sequence[PlanModel] = self.session.scalars(stmt).all()
return pd.DataFrame(
[
{
col.name: getattr(plan, col.name)
for col in PlanModel.__table__.columns.values()
}
for plan in plans
]
)
def get_recommendations(self, plan_ids: List[int]) -> pd.DataFrame:
if not plan_ids:
logger.info("No plan ids provided")
return pd.DataFrame()
stmt = (
select(Recommendation, PlanModel.scenario_id, PlanModel.name)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
.where(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),
Recommendation.already_installed.is_(False),
)
)
rows: Sequence[Tuple[Recommendation, Optional[int], Optional[str]]] = (
self.session.execute(stmt).tuples().all()
)
data: List[Dict[str, Any]] = [
{
**{
col.name: getattr(rec_model, col.name)
for col in Recommendation.__table__.columns.values()
},
"scenario_id": scenario_id,
"plan_name": plan_name,
}
for rec_model, scenario_id, plan_name in rows
]
return pd.DataFrame(data)
def attach_materials(self, recommendations_df: pd.DataFrame) -> pd.DataFrame:
if recommendations_df.empty:
recommendations_df["materials"] = []
return recommendations_df
rec_ids: List[int] = recommendations_df["id"].astype(int).tolist()
stmt = (
select(RecommendationMaterials, Material)
.join(Material, RecommendationMaterials.material_id == Material.id)
.where(RecommendationMaterials.recommendation_id.in_(rec_ids))
)
rows: Sequence[Tuple[RecommendationMaterials, Material]] = (
self.session.execute(stmt).tuples().all()
)
materials_map: Dict[int, List[Dict[str, Any]]] = defaultdict(list)
for rec_mat, material in rows:
materials_map[rec_mat.recommendation_id].append(
{
"material_id": rec_mat.material_id,
"depth": rec_mat.depth,
"quantity": rec_mat.quantity,
"quantity_unit": rec_mat.quantity_unit,
"estimated_cost": rec_mat.estimated_cost,
"type": material.type.value if material.type else None,
"includes_battery": material.includes_battery,
}
)
recommendations_df["materials"] = recommendations_df["id"].astype(int).apply(
lambda x: materials_map.get(x, [])
)
return recommendations_df

View file

@ -0,0 +1,40 @@
from typing import Optional, Union, List
from pydantic import BaseModel, model_validator, PrivateAttr
class ExportRequest(BaseModel):
# uuid which maps to a specific export request, used for tracking and logging
task_id: Union[str, None]
# uuid which maps to a specific export operation, used for tracking and logging. subtask is the child of the
# task, where the work has been distributed across workers
subtask_id: Union[str, None]
# associated portfolio id for the export request
portfolio_id: int
# list of scenario ids to export
scenario_ids: List[int]
# boolean which will overwrite the scenario ids. If this is true, we will only export the default plan for each
# property and will ignore the scenario ids
default_plans_only: Optional[bool] = False
# Private attribute to indicate whether scenario_ids should be ignored due to default_plans_only being True
_scenario_ids_ignored: bool = PrivateAttr(default=False)
@model_validator(mode="after")
def validate_default_plan_override(self):
"""
If default_plans_only is True and scenario_ids were provided,
we allow execution but make it explicit that scenario_ids
will be ignored.
"""
if self.default_plans_only and self.scenario_ids:
# We do NOT raise — we allow execution.
# We just mark the object so the handler can log/return a warning.
object.__setattr__(self, "_scenario_ids_ignored", True)
else:
object.__setattr__(self, "_scenario_ids_ignored", False)
return self
@property
def scenario_ids_ignored(self) -> bool:
return self._scenario_ids_ignored

View file

@ -0,0 +1,179 @@
import json
from typing import Optional, Any, Mapping, Dict, Union, List
import pandas as pd
from sqlalchemy.orm import Session
from backend.export.property_scenarios.input_schema import ExportRequest
from backend.export.property_scenarios.db_functions import DbMethods
from backend.app.db.connection import db_read_session
from backend.app.utils import sap_to_epc
from utils.logger import setup_logger
logger = setup_logger()
def choose_group_keys(payload: ExportRequest) -> List[Union[int, str]]:
if payload.default_plans_only:
return ["default_plans"] # Single export, no scenario grouping
return payload.scenario_ids
def has_solar_with_battery(materials_list: Optional[List[Dict[str, Any]]]) -> bool:
"""
Simple check to determine if any material in the list is a solar PV measure that includes a battery.
:param materials_list:
:return:
"""
for m in materials_list or []:
if (
m.get("type") == "solar_pv"
and m.get("includes_battery") is True
):
return True
return False
def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, int], pd.DataFrame]:
export_files: Dict[Union[str, int], pd.DataFrame] = {}
db_methods = DbMethods(session)
properties_df = db_methods.get_properties(payload.portfolio_id)
logger.info("Retrieved %s properties for export", len(properties_df))
plans_df: pd.DataFrame = db_methods.get_latest_plans(
portfolio_id=payload.portfolio_id,
scenario_ids=payload.scenario_ids,
default_only=bool(payload.default_plans_only),
)
logger.info("Retrieved %s plans for export", len(plans_df))
if plans_df.empty:
logger.info("Empty plans dataframe - no plans to export. Returning empty export.")
return export_files
plan_ids: List[int] = plans_df["id"].tolist()
recommendations_df: pd.DataFrame = db_methods.get_recommendations(plan_ids)
logger.info("Retrieved %s recommendations for export", len(recommendations_df))
recommendations_df = db_methods.attach_materials(recommendations_df)
recommendations_df["has_solar_with_battery"] = (
recommendations_df["materials"].apply(has_solar_with_battery)
)
_filter = (
(recommendations_df["measure_type"] == "solar_pv")
& (recommendations_df["has_solar_with_battery"])
)
recommendations_df.loc[_filter, "measure_type"] = (
recommendations_df.loc[_filter, "measure_type"] + "_with_battery"
)
group_keys: List[Union[str, int]] = choose_group_keys(payload)
for group_key in group_keys:
if payload.default_plans_only:
scenario_recs = recommendations_df
else:
scenario_recs = recommendations_df[
recommendations_df["scenario_id"] == group_key
]
if scenario_recs.empty:
logger.info("No recommendations found for group_key %s - skipping export for this group", group_key)
continue
measures_df: pd.DataFrame = scenario_recs[
["property_id", "measure_type", "plan_name", "estimated_cost"]
].drop_duplicates()
pivot: pd.DataFrame = measures_df.pivot(
index=["property_id", "plan_name"],
columns="measure_type",
values="estimated_cost",
).reset_index()
pivot["total_retrofit_cost"] = (
pivot.drop(columns=["property_id", "plan_name"]).sum(axis=1)
)
post_sap: pd.DataFrame = (
scenario_recs.groupby("property_id")[["sap_points"]]
.sum()
.reset_index()
)
df: pd.DataFrame = (
properties_df.rename(columns={"solar_pv": "existing_solar_pv"})
.merge(pivot, how="left", on="property_id")
.merge(post_sap, how="left", on="property_id")
)
df["sap_points"] = df["sap_points"].fillna(0)
df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(sap_to_epc)
export_files[group_key] = df
return export_files
# ============================================================
# Lambda Handler
# ============================================================
def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Union[int, str]]:
"""
Example event:
body_dict = {
"task_id": "test",
"subtask_id": "test",
"portfolio_id": 569,
"scenario_ids": [],
"default_plans_only": True,
}
:param event: Lambda event containing export request details
:param context: Lambda context (not used in this handler but included for completeness)
:return: HTTP response indicating success or failure of the export operation
"""
for record in event.get("Records", []):
try:
body_dict = json.loads(record["body"])
logger.debug("Validating request body")
payload = ExportRequest.model_validate(body_dict)
if payload.scenario_ids_ignored:
logger.warning(
"Received scenario_ids in request body but they will be ignored "
"because default_plans_only is set to True"
)
logger.debug("Successfully validated request body")
with db_read_session() as session:
exported_files = process_export(payload, session)
# TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url
_ = exported_files
return {
"statusCode": 200,
"body": json.dumps({}),
}
except Exception as e:
logger.error(f"Failed to process record: {e}")
return {
"statusCode": 500,
"body": json.dumps({"message": "Failed to process export request"}),
}
return {
"statusCode": 201,
"body": json.dumps({"message": "No records to process"}),
}

View file

@ -0,0 +1,55 @@
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from backend.app.db.base import Base
@pytest.fixture(scope="function")
def engine(postgresql):
"""
Create a SQLAlchemy engine bound to the ephemeral
pytest-postgresql database.
"""
# Build SQLAlchemy URL from psycopg connection info
connection_string = (
f"postgresql+psycopg://"
f"{postgresql.info.user}:"
f"{postgresql.info.password}@"
f"{postgresql.info.host}:"
f"{postgresql.info.port}/"
f"{postgresql.info.dbname}"
)
engine = create_engine(connection_string)
# Create tables once per test session
Base.metadata.create_all(engine)
# Yeild will split this function into two phase. 1) setup and 2) teardown, the latter of which will run after all
# tests have completed
yield engine
# Clean-up after entire test session
Base.metadata.drop_all(engine)
engine.dispose()
@pytest.fixture(scope="function")
def db_session(engine):
"""
Provides a clean transactional session per test.
Rolls back after each test to keep isolation.
"""
connection = engine.connect()
transaction = connection.begin()
session = sessionmaker(bind=connection)()
yield session
session.close()
transaction.rollback()
connection.close()

View file

@ -0,0 +1,14 @@
id,plan_id,recommendation_id
24799722,1604277,24798968
24799726,1604277,24798972
24801150,1604367,24800396
24802703,1604448,24801949
24802724,1604448,24801970
24805327,1604577,24804573
24805397,1604579,24804643
24805401,1604579,24804647
24813000,1605111,24812246
24813002,1605111,24812248
24813004,1605111,24812250
24813006,1605112,24812252
24813009,1605112,24812255
1 id plan_id recommendation_id
2 24799722 1604277 24798968
3 24799726 1604277 24798972
4 24801150 1604367 24800396
5 24802703 1604448 24801949
6 24802724 1604448 24801970
7 24805327 1604577 24804573
8 24805397 1604579 24804643
9 24805401 1604579 24804647
10 24813000 1605111 24812246
11 24813002 1605111 24812248
12 24813004 1605111 24812250
13 24813006 1605112 24812252
14 24813009 1605112 24812255

View file

@ -0,0 +1,11 @@
id,name,portfolio_id,property_id,scenario_id,created_at,is_default,valuation_increase_lower_bound,valuation_increase_upper_bound,valuation_increase_average,plan_type,post_sap_points,post_epc_rating,post_co2_emissions,co2_savings,post_energy_bill,energy_bill_savings,post_energy_consumption,energy_consumption_savings,valuation_post_retrofit,valuation_increase,cost_of_works,contingency_cost
1604277,,569,660478,1060,2026-02-19 16:14:45.560816,True,0.0302,0.07,0.048226666,,71.5,Epc.C,4.1813498,0.71865046,1447.5204,691.6662,15303.688,3276.7622,,,6984.568,1003.9568
1604448,,569,660529,1060,2026-02-19 16:14:52.052740,True,0.0302,0.07,0.048226666,,70.0,Epc.C,7.32816,1.5818402,2978.734,2314.7651,16558.295,1837.0155,,,13528.6,2844.636
1604367,,569,660538,1060,2026-02-19 16:14:48.517937,True,0.02,0.03,0.025,,71.0,Epc.C,5.003036,0.43696404,1933.2236,521.5316,19190.531,1883.4657,,,5520.0,828.0
1604577,,569,660688,1060,2026-02-19 16:15:04.461456,True,0.02,0.03,0.025,,70.0,Epc.C,3.6019807,0.20801921,1610.3181,248.27809,13746.731,896.6345,,,5100.0,765.0
1604579,,569,660690,1060,2026-02-19 16:15:04.461456,True,0.02,0.03,0.025,,70.0,Epc.C,4.7473392,0.5326607,1867.537,699.7881,18730.615,2527.2231,,,5469.0,825.74
1605110,,569,660598,1069,2026-02-19 16:18:57.606337,True,0.0,0.0,0.0,,70.0,Epc.C,1.89,0.0,1125.7338,0.0,7268.866,0.0,,,0.0,0.0
1605111,,569,660599,1069,2026-02-19 16:18:57.606337,True,0.0,0.0,0.0,,68.7,Epc.D,2.02,1.1,1174.9326,319.18213,7748.233,3924.9,,,1218.584,124.0984
1605080,,569,660448,1069,2026-02-19 16:18:57.581528,True,0.0,0.0,0.0,,71.0,Epc.C,1.79,0.0,1101.9677,0.0,6821.7285,0.0,,,0.0,0.0
1605112,,569,660600,1069,2026-02-19 16:18:57.606337,True,0.0,0.0,0.0,,64.9,Epc.D,1.89,0.8,1131.3535,172.0886,7241.062,2466.7,,,3885.834,716.7084
1605404,,569,660652,1069,2026-02-19 16:19:28.383096,True,0.0,0.0,0.0,,71.0,Epc.C,3.18,0.0,1757.515,0.0,11929.814,0.0,,,0.0,0.0
1 id name portfolio_id property_id scenario_id created_at is_default valuation_increase_lower_bound valuation_increase_upper_bound valuation_increase_average plan_type post_sap_points post_epc_rating post_co2_emissions co2_savings post_energy_bill energy_bill_savings post_energy_consumption energy_consumption_savings valuation_post_retrofit valuation_increase cost_of_works contingency_cost
2 1604277 569 660478 1060 2026-02-19 16:14:45.560816 True 0.0302 0.07 0.048226666 71.5 Epc.C 4.1813498 0.71865046 1447.5204 691.6662 15303.688 3276.7622 6984.568 1003.9568
3 1604448 569 660529 1060 2026-02-19 16:14:52.052740 True 0.0302 0.07 0.048226666 70.0 Epc.C 7.32816 1.5818402 2978.734 2314.7651 16558.295 1837.0155 13528.6 2844.636
4 1604367 569 660538 1060 2026-02-19 16:14:48.517937 True 0.02 0.03 0.025 71.0 Epc.C 5.003036 0.43696404 1933.2236 521.5316 19190.531 1883.4657 5520.0 828.0
5 1604577 569 660688 1060 2026-02-19 16:15:04.461456 True 0.02 0.03 0.025 70.0 Epc.C 3.6019807 0.20801921 1610.3181 248.27809 13746.731 896.6345 5100.0 765.0
6 1604579 569 660690 1060 2026-02-19 16:15:04.461456 True 0.02 0.03 0.025 70.0 Epc.C 4.7473392 0.5326607 1867.537 699.7881 18730.615 2527.2231 5469.0 825.74
7 1605110 569 660598 1069 2026-02-19 16:18:57.606337 True 0.0 0.0 0.0 70.0 Epc.C 1.89 0.0 1125.7338 0.0 7268.866 0.0 0.0 0.0
8 1605111 569 660599 1069 2026-02-19 16:18:57.606337 True 0.0 0.0 0.0 68.7 Epc.D 2.02 1.1 1174.9326 319.18213 7748.233 3924.9 1218.584 124.0984
9 1605080 569 660448 1069 2026-02-19 16:18:57.581528 True 0.0 0.0 0.0 71.0 Epc.C 1.79 0.0 1101.9677 0.0 6821.7285 0.0 0.0 0.0
10 1605112 569 660600 1069 2026-02-19 16:18:57.606337 True 0.0 0.0 0.0 64.9 Epc.D 1.89 0.8 1131.3535 172.0886 7241.062 2466.7 3885.834 716.7084
11 1605404 569 660652 1069 2026-02-19 16:19:28.383096 True 0.0 0.0 0.0 71.0 Epc.C 3.18 0.0 1757.515 0.0 11929.814 0.0 0.0 0.0

View file

@ -0,0 +1,2 @@
id,name,budget,status,goal,cost,number_of_properties,co2_equivalent_savings,energy_savings,energy_cost_savings,property_valuation_increase,rental_yield_increase,total_work_hours,labour_days,created_at,updated_at,epc_breakdown_pre_retrofit,epc_breakdown_post_retrofit,n_units_to_retrofit,co2_per_unit_pre_retrofit,co2_per_unit_post_retrofit,energy_bill_per_unit_pre_retrofit,energy_bill_per_unit_post_retrofit,energy_consumption_per_unit_pre_retrofit,energy_consumption_per_unit_post_retrofit,valuation_improvement_per_unit,cost_per_unit,cost_per_co2_saved,cost_per_sap_point,valuation_return_on_investment
569,Lifespace Rentals - Sample Retrofit Plans,,PortfolioStatus.SCOPING,PortfolioGoal.NONE,,,,,,,,,,2026-02-12 21:23:37.862000+00:00,2026-02-12 21:23:37.862000+00:00,,,,,,,,,,,,,,
1 id name budget status goal cost number_of_properties co2_equivalent_savings energy_savings energy_cost_savings property_valuation_increase rental_yield_increase total_work_hours labour_days created_at updated_at epc_breakdown_pre_retrofit epc_breakdown_post_retrofit n_units_to_retrofit co2_per_unit_pre_retrofit co2_per_unit_post_retrofit energy_bill_per_unit_pre_retrofit energy_bill_per_unit_post_retrofit energy_consumption_per_unit_pre_retrofit energy_consumption_per_unit_post_retrofit valuation_improvement_per_unit cost_per_unit cost_per_co2_saved cost_per_sap_point valuation_return_on_investment
2 569 Lifespace Rentals - Sample Retrofit Plans PortfolioStatus.SCOPING PortfolioGoal.NONE 2026-02-12 21:23:37.862000+00:00 2026-02-12 21:23:37.862000+00:00

View file

@ -0,0 +1,11 @@
,id,portfolio_id,creation_status,uprn,landlord_property_id,building_reference_number,status,address,postcode,has_pre_condition_report,has_recommendations,created_at,updated_at,property_type,built_form,local_authority,constituency,number_of_rooms,year_built,tenure,current_epc_rating,current_sap_points,current_valuation,installed_measures_sap_point_adjustment,is_sap_points_adjusted_for_installed_measures,original_sap_points
0,660478,569,PropertyCreationStatus.READY,100090438731.0,BARR052,3460742868.0,PortfolioStatus.ASSESSMENT,"52, Barrack Street",CO1 2LR,True,True,2026-02-12 21:59:02.744427,2026-02-19 16:18:57.941443,House,End-Terrace,Colchester,Colchester,4.0,1900.0,rental (private),Epc.E,53.0,0.0,0.0,False,53.0
1,660448,569,PropertyCreationStatus.READY,100090678548.0,BOUR110A,10002385993.0,PortfolioStatus.ASSESSMENT,Upper 110a Bournemouth Park Road,SS2 5LS,True,True,2026-02-12 21:59:02.388473,2026-02-19 16:18:57.578330,Flat,Detached,Southend-on-Sea,Rochford and Southend East,2.0,1950.0,Rented (private),Epc.C,71.0,0.0,0.0,False,71.0
2,660538,569,PropertyCreationStatus.READY,10033423541.0,CHUR099,8188570968.0,PortfolioStatus.ASSESSMENT,"99, Church Road",RM3 0SH,True,True,2026-02-12 21:59:03.203854,2026-02-19 16:19:03.748571,House,Mid-Terrace,Havering,Hornchurch and Upminster,5.0,1900.0,rental (private),Epc.D,58.0,0.0,0.0,False,58.0
3,660529,569,PropertyCreationStatus.READY,100091596678.0,CHER003,8961772668.0,PortfolioStatus.ASSESSMENT,"3, Brickfield Cottages",SS4 1PP,True,True,2026-02-12 21:59:02.935502,2026-02-19 16:18:55.971569,House,Mid-Terrace,Rochford,Rochford and Southend East,4.0,1900.0,rental (private),Epc.E,41.0,0.0,0.0,False,41.0
4,660598,569,PropertyCreationStatus.READY,100090663644.0,FLEM049B,10006705876.0,PortfolioStatus.ASSESSMENT,49b Flemming Crescent,SS9 4HR,True,True,2026-02-12 21:59:04.732965,2026-02-19 16:18:57.601893,Flat,Semi-Detached,Southend-on-Sea,,2.0,1930.0,Rented (social),Epc.C,70.0,0.0,0.0,False,70.0
5,660599,569,PropertyCreationStatus.READY,10012149765.0,FORE003A,9740118668.0,PortfolioStatus.ASSESSMENT,"3a, Forest Avenue",SS1 2HU,True,True,2026-02-12 21:59:04.732965,2026-02-19 16:18:57.601893,Flat,End-Terrace,Southend-on-Sea,Rochford and Southend East,2.0,1930.0,rental (private),Epc.D,56.0,0.0,0.0,False,56.0
6,660600,569,PropertyCreationStatus.READY,10012149797.0,FORE003GFF,1436818568.0,PortfolioStatus.ASSESSMENT,"3, Forest Avenue",SS1 2HU,True,True,2026-02-12 21:59:04.732965,2026-02-19 16:18:57.601893,Flat,End-Terrace,Southend-on-Sea,Rochford and Southend East,2.0,1900.0,rental (private),Epc.D,59.0,0.0,0.0,False,59.0
7,660652,569,PropertyCreationStatus.READY,100022668838.0,MANT061,10000429573.0,PortfolioStatus.ASSESSMENT,61 MANTILLA ROAD,SW17 8DY,True,True,2026-02-12 21:59:04.711717,2026-02-19 16:19:28.379512,Flat,Mid-Terrace,Wandsworth,Tooting,4.0,1900.0,Owner-occupied,Epc.C,71.0,0.0,0.0,False,71.0
8,660690,569,PropertyCreationStatus.READY,100021987220.0,MERR008,9050743578.0,PortfolioStatus.ASSESSMENT,"8, Merritt Road",SE4 1DY,True,True,2026-02-12 21:59:09.459245,2026-02-19 16:19:32.826638,House,Mid-Terrace,Lewisham,"Lewisham, Deptford",6.0,1900.0,owner-occupied,Epc.D,58.0,0.0,0.0,False,58.0
9,660688,569,PropertyCreationStatus.READY,207158120.0,MEDC048,208210678.0,PortfolioStatus.ASSESSMENT,"48, Medcalf Road",EN3 6HL,True,True,2026-02-12 21:59:09.459245,2026-02-19 16:19:32.826638,House,Mid-Terrace,Enfield,Enfield North,4.0,1900.0,rental (private),Epc.D,61.0,0.0,0.0,False,61.0
1 id portfolio_id creation_status uprn landlord_property_id building_reference_number status address postcode has_pre_condition_report has_recommendations created_at updated_at property_type built_form local_authority constituency number_of_rooms year_built tenure current_epc_rating current_sap_points current_valuation installed_measures_sap_point_adjustment is_sap_points_adjusted_for_installed_measures original_sap_points
2 0 660478 569 PropertyCreationStatus.READY 100090438731.0 BARR052 3460742868.0 PortfolioStatus.ASSESSMENT 52, Barrack Street CO1 2LR True True 2026-02-12 21:59:02.744427 2026-02-19 16:18:57.941443 House End-Terrace Colchester Colchester 4.0 1900.0 rental (private) Epc.E 53.0 0.0 0.0 False 53.0
3 1 660448 569 PropertyCreationStatus.READY 100090678548.0 BOUR110A 10002385993.0 PortfolioStatus.ASSESSMENT Upper 110a Bournemouth Park Road SS2 5LS True True 2026-02-12 21:59:02.388473 2026-02-19 16:18:57.578330 Flat Detached Southend-on-Sea Rochford and Southend East 2.0 1950.0 Rented (private) Epc.C 71.0 0.0 0.0 False 71.0
4 2 660538 569 PropertyCreationStatus.READY 10033423541.0 CHUR099 8188570968.0 PortfolioStatus.ASSESSMENT 99, Church Road RM3 0SH True True 2026-02-12 21:59:03.203854 2026-02-19 16:19:03.748571 House Mid-Terrace Havering Hornchurch and Upminster 5.0 1900.0 rental (private) Epc.D 58.0 0.0 0.0 False 58.0
5 3 660529 569 PropertyCreationStatus.READY 100091596678.0 CHER003 8961772668.0 PortfolioStatus.ASSESSMENT 3, Brickfield Cottages SS4 1PP True True 2026-02-12 21:59:02.935502 2026-02-19 16:18:55.971569 House Mid-Terrace Rochford Rochford and Southend East 4.0 1900.0 rental (private) Epc.E 41.0 0.0 0.0 False 41.0
6 4 660598 569 PropertyCreationStatus.READY 100090663644.0 FLEM049B 10006705876.0 PortfolioStatus.ASSESSMENT 49b Flemming Crescent SS9 4HR True True 2026-02-12 21:59:04.732965 2026-02-19 16:18:57.601893 Flat Semi-Detached Southend-on-Sea 2.0 1930.0 Rented (social) Epc.C 70.0 0.0 0.0 False 70.0
7 5 660599 569 PropertyCreationStatus.READY 10012149765.0 FORE003A 9740118668.0 PortfolioStatus.ASSESSMENT 3a, Forest Avenue SS1 2HU True True 2026-02-12 21:59:04.732965 2026-02-19 16:18:57.601893 Flat End-Terrace Southend-on-Sea Rochford and Southend East 2.0 1930.0 rental (private) Epc.D 56.0 0.0 0.0 False 56.0
8 6 660600 569 PropertyCreationStatus.READY 10012149797.0 FORE003GFF 1436818568.0 PortfolioStatus.ASSESSMENT 3, Forest Avenue SS1 2HU True True 2026-02-12 21:59:04.732965 2026-02-19 16:18:57.601893 Flat End-Terrace Southend-on-Sea Rochford and Southend East 2.0 1900.0 rental (private) Epc.D 59.0 0.0 0.0 False 59.0
9 7 660652 569 PropertyCreationStatus.READY 100022668838.0 MANT061 10000429573.0 PortfolioStatus.ASSESSMENT 61 MANTILLA ROAD SW17 8DY True True 2026-02-12 21:59:04.711717 2026-02-19 16:19:28.379512 Flat Mid-Terrace Wandsworth Tooting 4.0 1900.0 Owner-occupied Epc.C 71.0 0.0 0.0 False 71.0
10 8 660690 569 PropertyCreationStatus.READY 100021987220.0 MERR008 9050743578.0 PortfolioStatus.ASSESSMENT 8, Merritt Road SE4 1DY True True 2026-02-12 21:59:09.459245 2026-02-19 16:19:32.826638 House Mid-Terrace Lewisham Lewisham, Deptford 6.0 1900.0 owner-occupied Epc.D 58.0 0.0 0.0 False 58.0
11 9 660688 569 PropertyCreationStatus.READY 207158120.0 MEDC048 208210678.0 PortfolioStatus.ASSESSMENT 48, Medcalf Road EN3 6HL True True 2026-02-12 21:59:09.459245 2026-02-19 16:19:32.826638 House Mid-Terrace Enfield Enfield North 4.0 1900.0 rental (private) Epc.D 61.0 0.0 0.0 False 61.0

View file

@ -0,0 +1,11 @@
,id,property_id,portfolio_id,full_address,lodgement_date,is_expired,total_floor_area,walls,walls_rating,roof,roof_rating,floor,floor_rating,windows,windows_rating,heating,heating_rating,heating_controls,heating_controls_rating,hot_water,hot_water_rating,lighting,lighting_rating,mainfuel,ventilation,solar_pv,solar_hot_water,wind_turbine,floor_height,number_heated_rooms,heat_loss_corridor,unheated_corridor_length,number_of_open_fireplaces,number_of_extensions,number_of_storeys,mains_gas,energy_tariff,primary_energy_consumption,co2_emissions,current_energy_demand,current_energy_demand_heating_hotwater,estimated,sap_05_overwritten,sap_05_score,sap_05_epc_rating,heating_cost_current,hot_water_cost_current,lighting_cost_current,appliances_cost_current,gas_standing_charge,electricity_standing_charge,original_co2_emissions,original_primary_energy_consumption,original_current_energy_demand,original_current_energy_demand_heating_hotwater,installed_measures_co2_adjustment,installed_measures_energy_demand_adjustment,installed_measures_total_energy_bill_adjustment,installed_measures_heat_demand_adjustment,is_epc_adjusted_for_installed_measures
44,1534934,660688,569,"48, Medcalf Road",2018-09-05,False,68.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,"Solid, no insulation",,Fully double glazed,4,"Boiler and radiators, mains gas",4,"Programmer, room thermostat and trvs",4,From main system,4,Low energy lighting in all fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.55,,False,,0,0,,True,Single,278.0,3.81,14643.366,12185.6,False,False,,,711.0628,139.06198,70.770935,609.7844,128.0785,199.8375,3.81,278.0,14643.366,12185.6,0.0,0.0,0.0,0.0,False
53,1534816,660600,569,"3, Forest Avenue",2020-02-27,False,35.0,"Solid brick, as built, no insulation",1,(another dwelling above),,"Suspended, no insulation",,Fully double glazed,3,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in 83% of fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.64,,False,,0,0,,True,Single,389.0,2.69,9707.762,8267.8,False,False,,,466.75378,110.046844,53.1057,345.6198,128.0785,199.8375,2.69,389.0,9707.762,8267.8,0.0,0.0,0.0,0.0,False
292,1534754,660478,569,"52, Barrack Street",2019-09-11,False,67.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,"Solid, no insulation",,Partial double glazing,2,"Boiler and radiators, mains gas",4,"Programmer, room thermostat and trvs",4,From main system,4,Low energy lighting in 78% of fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.36,,False,,0,1,,True,Single,374.0,4.9,18580.451,16094.1,False,False,,,980.4243,142.37581,86.25319,602.2173,128.0785,199.8375,4.9,374.0,18580.451,16094.1,0.0,0.0,0.0,0.0,False
295,1534868,660652,569,"61 MANTILLA ROAD, LONDON",2020-12-10,False,79.0,"Solid brick, as built, no insulation",1,(another dwelling above),,"Solid, no insulation",,Fully double glazed,3,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in all fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.63,,False,,0,0,,True,off-peak 7 hour,184.0,3.18,11929.814,9046.1,False,False,,,487.25763,143.84087,110.2875,688.2131,128.0785,199.8375,3.18,184.0,11929.814,9046.1,0.0,0.0,0.0,0.0,False
310,1534964,660448,569,Upper 110a Bournemouth Park Road,2022-02-22,False,35.0,"Solid brick, as built, no insulation",1,"Pitched, 100 mm loft insulation",3.0,(another dwelling below),,Fully double glazed,3,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in 80% of fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.41,,False,,0,0,,True,Unknown,238.0,1.79,6821.7285,5382.4,False,False,,,272.55676,102.9448,52.930252,345.6198,128.0785,199.8375,1.79,238.0,6821.7285,5382.4,0.0,0.0,0.0,0.0,False
344,1534936,660690,569,"8, Merritt Road",2017-08-15,False,101.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,"Suspended, no insulation",,Fully double glazed,3,"Boiler and radiators, mains gas",4,"Programmer, room thermostat and trvs",4,From main system,4,No low energy lighting,1,Mains gas not community,natural,0.0,False,0.0,2.6,,False,,0,1,,True,Unknown,260.0,5.28,21257.838,17606.3,False,False,,,1074.1602,154.13814,194.25749,816.8532,128.0785,199.8375,5.28,260.0,21257.838,17606.3,0.0,0.0,0.0,0.0,False
460,1535385,660529,569,"3, Brickfield Cottages, Cherry Orchard Lane",2020-04-09,False,85.0,"Solid brick, as built, no insulation",2,"Pitched, 200 mm loft insulation",4.0,"Suspended, no insulation",,Fully double glazed,3,Electric storage heaters,3,Manual charge control,2,"Electric immersion, off-peak",3,Low energy lighting in 58% of fixed outlets,4,Electricity not community,natural,0.0,False,0.0,2.45,,False,,0,1,,True,dual,577.0,8.91,18395.31,15230.1,False,False,,,3550.6333,666.58136,149.46556,726.9812,0.0,199.8375,8.91,577.0,18395.31,15230.1,0.0,0.0,0.0,0.0,False
485,1534784,660538,569,"99, Church Road, Harold Wood",2019-09-03,False,92.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,"Suspended, no insulation",,Fully double glazed,4,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in 80% of fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.52,,False,,0,1,,True,Single,297.0,5.44,21073.996,17904.0,False,False,,,1092.4246,156.6427,109.16419,768.6077,128.0785,199.8375,5.44,297.0,21073.996,17904.0,0.0,0.0,0.0,0.0,False
494,1534814,660598,569,49b Flemming Crescent,2024-10-03,False,35.0,"Solid brick, as built, no insulation",1,(another dwelling above),,"Suspended, no insulation",,Fully double glazed,4,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in all fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.42,,False,,0,0,,True,Single,261.0,1.89,7268.866,5865.4,False,False,,,304.39737,104.800545,43.0,345.6198,128.0785,199.8375,1.89,261.0,7268.866,5865.4,0.0,0.0,0.0,0.0,False
741,1534815,660599,569,"3a, Forest Avenue",2020-06-05,False,40.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,(another dwelling below),,Fully double glazed,3,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in 38% of fixed outlets,3,Mains gas not community,natural,0.0,False,0.0,2.58,,False,,0,0,,True,Unknown,396.0,3.12,11673.133,9974.6,False,False,,,587.73975,108.13529,85.62337,384.70035,128.0785,199.8375,3.12,396.0,11673.133,9974.6,0.0,0.0,0.0,0.0,False
1 id property_id portfolio_id full_address lodgement_date is_expired total_floor_area walls walls_rating roof roof_rating floor floor_rating windows windows_rating heating heating_rating heating_controls heating_controls_rating hot_water hot_water_rating lighting lighting_rating mainfuel ventilation solar_pv solar_hot_water wind_turbine floor_height number_heated_rooms heat_loss_corridor unheated_corridor_length number_of_open_fireplaces number_of_extensions number_of_storeys mains_gas energy_tariff primary_energy_consumption co2_emissions current_energy_demand current_energy_demand_heating_hotwater estimated sap_05_overwritten sap_05_score sap_05_epc_rating heating_cost_current hot_water_cost_current lighting_cost_current appliances_cost_current gas_standing_charge electricity_standing_charge original_co2_emissions original_primary_energy_consumption original_current_energy_demand original_current_energy_demand_heating_hotwater installed_measures_co2_adjustment installed_measures_energy_demand_adjustment installed_measures_total_energy_bill_adjustment installed_measures_heat_demand_adjustment is_epc_adjusted_for_installed_measures
2 44 1534934 660688 569 48, Medcalf Road 2018-09-05 False 68.0 Solid brick, as built, no insulation 1 Pitched, no insulation 1.0 Solid, no insulation Fully double glazed 4 Boiler and radiators, mains gas 4 Programmer, room thermostat and trvs 4 From main system 4 Low energy lighting in all fixed outlets 5 Mains gas not community natural 0.0 False 0.0 2.55 False 0 0 True Single 278.0 3.81 14643.366 12185.6 False False 711.0628 139.06198 70.770935 609.7844 128.0785 199.8375 3.81 278.0 14643.366 12185.6 0.0 0.0 0.0 0.0 False
3 53 1534816 660600 569 3, Forest Avenue 2020-02-27 False 35.0 Solid brick, as built, no insulation 1 (another dwelling above) Suspended, no insulation Fully double glazed 3 Boiler and radiators, mains gas 4 Programmer and room thermostat 3 From main system 4 Low energy lighting in 83% of fixed outlets 5 Mains gas not community natural 0.0 False 0.0 2.64 False 0 0 True Single 389.0 2.69 9707.762 8267.8 False False 466.75378 110.046844 53.1057 345.6198 128.0785 199.8375 2.69 389.0 9707.762 8267.8 0.0 0.0 0.0 0.0 False
4 292 1534754 660478 569 52, Barrack Street 2019-09-11 False 67.0 Solid brick, as built, no insulation 1 Pitched, no insulation 1.0 Solid, no insulation Partial double glazing 2 Boiler and radiators, mains gas 4 Programmer, room thermostat and trvs 4 From main system 4 Low energy lighting in 78% of fixed outlets 5 Mains gas not community natural 0.0 False 0.0 2.36 False 0 1 True Single 374.0 4.9 18580.451 16094.1 False False 980.4243 142.37581 86.25319 602.2173 128.0785 199.8375 4.9 374.0 18580.451 16094.1 0.0 0.0 0.0 0.0 False
5 295 1534868 660652 569 61 MANTILLA ROAD, LONDON 2020-12-10 False 79.0 Solid brick, as built, no insulation 1 (another dwelling above) Solid, no insulation Fully double glazed 3 Boiler and radiators, mains gas 4 Programmer and room thermostat 3 From main system 4 Low energy lighting in all fixed outlets 5 Mains gas not community natural 0.0 False 0.0 2.63 False 0 0 True off-peak 7 hour 184.0 3.18 11929.814 9046.1 False False 487.25763 143.84087 110.2875 688.2131 128.0785 199.8375 3.18 184.0 11929.814 9046.1 0.0 0.0 0.0 0.0 False
6 310 1534964 660448 569 Upper 110a Bournemouth Park Road 2022-02-22 False 35.0 Solid brick, as built, no insulation 1 Pitched, 100 mm loft insulation 3.0 (another dwelling below) Fully double glazed 3 Boiler and radiators, mains gas 4 Programmer and room thermostat 3 From main system 4 Low energy lighting in 80% of fixed outlets 5 Mains gas not community natural 0.0 False 0.0 2.41 False 0 0 True Unknown 238.0 1.79 6821.7285 5382.4 False False 272.55676 102.9448 52.930252 345.6198 128.0785 199.8375 1.79 238.0 6821.7285 5382.4 0.0 0.0 0.0 0.0 False
7 344 1534936 660690 569 8, Merritt Road 2017-08-15 False 101.0 Solid brick, as built, no insulation 1 Pitched, no insulation 1.0 Suspended, no insulation Fully double glazed 3 Boiler and radiators, mains gas 4 Programmer, room thermostat and trvs 4 From main system 4 No low energy lighting 1 Mains gas not community natural 0.0 False 0.0 2.6 False 0 1 True Unknown 260.0 5.28 21257.838 17606.3 False False 1074.1602 154.13814 194.25749 816.8532 128.0785 199.8375 5.28 260.0 21257.838 17606.3 0.0 0.0 0.0 0.0 False
8 460 1535385 660529 569 3, Brickfield Cottages, Cherry Orchard Lane 2020-04-09 False 85.0 Solid brick, as built, no insulation 2 Pitched, 200 mm loft insulation 4.0 Suspended, no insulation Fully double glazed 3 Electric storage heaters 3 Manual charge control 2 Electric immersion, off-peak 3 Low energy lighting in 58% of fixed outlets 4 Electricity not community natural 0.0 False 0.0 2.45 False 0 1 True dual 577.0 8.91 18395.31 15230.1 False False 3550.6333 666.58136 149.46556 726.9812 0.0 199.8375 8.91 577.0 18395.31 15230.1 0.0 0.0 0.0 0.0 False
9 485 1534784 660538 569 99, Church Road, Harold Wood 2019-09-03 False 92.0 Solid brick, as built, no insulation 1 Pitched, no insulation 1.0 Suspended, no insulation Fully double glazed 4 Boiler and radiators, mains gas 4 Programmer and room thermostat 3 From main system 4 Low energy lighting in 80% of fixed outlets 5 Mains gas not community natural 0.0 False 0.0 2.52 False 0 1 True Single 297.0 5.44 21073.996 17904.0 False False 1092.4246 156.6427 109.16419 768.6077 128.0785 199.8375 5.44 297.0 21073.996 17904.0 0.0 0.0 0.0 0.0 False
10 494 1534814 660598 569 49b Flemming Crescent 2024-10-03 False 35.0 Solid brick, as built, no insulation 1 (another dwelling above) Suspended, no insulation Fully double glazed 4 Boiler and radiators, mains gas 4 Programmer and room thermostat 3 From main system 4 Low energy lighting in all fixed outlets 5 Mains gas not community natural 0.0 False 0.0 2.42 False 0 0 True Single 261.0 1.89 7268.866 5865.4 False False 304.39737 104.800545 43.0 345.6198 128.0785 199.8375 1.89 261.0 7268.866 5865.4 0.0 0.0 0.0 0.0 False
11 741 1534815 660599 569 3a, Forest Avenue 2020-06-05 False 40.0 Solid brick, as built, no insulation 1 Pitched, no insulation 1.0 (another dwelling below) Fully double glazed 3 Boiler and radiators, mains gas 4 Programmer and room thermostat 3 From main system 4 Low energy lighting in 38% of fixed outlets 3 Mains gas not community natural 0.0 False 0.0 2.58 False 0 0 True Unknown 396.0 3.12 11673.133 9974.6 False False 587.73975 108.13529 85.62337 384.70035 128.0785 199.8375 3.12 396.0 11673.133 9974.6 0.0 0.0 0.0 0.0 False

View file

@ -0,0 +1,14 @@
Unnamed: 0,id,property_id,created_at,type,measure_type,description,estimated_cost,default,starting_u_value,new_u_value,sap_points,heat_demand,kwh_savings,co2_equivalent_savings,energy_savings,energy_cost_savings,property_valuation_increase,rental_yield_increase,total_work_hours,labour_days,already_installed,plan_name
49705,24798968,660478,2026-02-19 16:14:45.560816,heating,time_temperature_zone_control,"Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & temperature zone control)",874.568,True,,,1.5,14.9,1041.2,0.2,14.9,72.639015,,,4.16,1.0,False,whatever
49709,24798972,660478,2026-02-19 16:14:45.560816,solar_pv,solar_pv,"8 panel system, 400W solar panels, 5.8kw Growatt battery - 3.2 kWp system",6110.0,True,,,17.0,79.1,2235.5623,0.5186504,79.1,619.02716,,,48.0,2.0,False,whatever
51133,24800396,660538,2026-02-19 16:14:48.517937,solar_pv,solar_pv,"10 panel system, 400W solar panels - 4.0 kWp system",5520.0,True,,,13.0,58.5,1883.4657,0.43696404,58.5,521.5316,,,48.0,2.0,False,whatever
52686,24801949,660529,2026-02-19 16:14:52.052740,heating,boiler_upgrade,"Upgrade to a new condensing boiler. Upgrade heating controls to Room thermostat, programmer and TRVs",8008.6,True,,,12.9,132.9,0.0,1.1556525,132.9,1806.0955,,,26.5,4.0,False,whatever
52707,24801970,660529,2026-02-19 16:14:52.052740,solar_pv,solar_pv,"10 panel system, 400W solar panels - 4.0 kWp system",5520.0,True,,,16.1,68.8,1837.0155,0.4261876,68.8,508.6696,,,48.0,2.0,False,whatever
55310,24804573,660688,2026-02-19 16:15:04.461456,solar_pv,solar_pv,"5 panel system, 400W solar panels - 2.0 kWp system",5100.0,True,,,9.0,41.4,896.6345,0.20801921,41.4,248.27809,,,48.0,2.0,False,whatever
55380,24804643,660690,2026-02-19 16:15:04.461456,low_energy_lighting,low_energy_lighting,Install low energy lighting in 14 outlets,49.0,True,,,2.0,18.2,766.5,0.124173,18.2,212.24385,,,1.0,0.125,False,whatever
55384,24804647,660690,2026-02-19 16:15:04.461456,solar_pv,solar_pv,"9 panel system, 400W solar panels - 3.6 kWp system",5420.0,True,,,10.0,43.9,1760.723,0.40848774,43.9,487.54422,,,48.0,2.0,False,whatever
62983,24812246,660599,2026-02-19 16:18:57.606337,loft_insulation,loft_insulation,Install 300mm of Knauf Loft Roll 44 glass fibre roll in your loft,600.0,True,2.3,2.3,8.4,102.8,3178.2,0.9,102.8,221.72618,,,8.0,1.0,False,whatever
62985,24812248,660599,2026-02-19 16:18:57.606337,low_energy_lighting,low_energy_lighting,Install low energy lighting in 4 outlets,14.0,True,,,1.0,14.2,219.0,0.0,14.2,60.6411,,,1.0,0.125,False,whatever
62987,24812250,660599,2026-02-19 16:18:57.606337,heating,time_temperature_zone_control,"Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & temperature zone control)",604.584,True,,,3.3,18.4,527.7,0.2,18.4,36.814835,,,3.08,1.0,False,whatever
62989,24812252,660600,2026-02-19 16:18:57.606337,suspended_floor_insulation,suspended_floor_insulation,Install 75mm Q-bot underfloor insulation insulation in suspended floor,3281.25,True,0.87,0.22,4.0,99.2,1816.6,0.6,99.2,126.734566,,,57.05,2.3770833,False,whatever
62992,24812255,660600,2026-02-19 16:18:57.606337,heating,time_temperature_zone_control,"Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & temperature zone control)",604.584,True,,,1.9,17.7,650.1,0.2,17.7,45.354034,,,3.08,1.0,False,whatever
1 Unnamed: 0 id property_id created_at type measure_type description estimated_cost default starting_u_value new_u_value sap_points heat_demand kwh_savings co2_equivalent_savings energy_savings energy_cost_savings property_valuation_increase rental_yield_increase total_work_hours labour_days already_installed plan_name
2 49705 24798968 660478 2026-02-19 16:14:45.560816 heating time_temperature_zone_control Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & temperature zone control) 874.568 True 1.5 14.9 1041.2 0.2 14.9 72.639015 4.16 1.0 False whatever
3 49709 24798972 660478 2026-02-19 16:14:45.560816 solar_pv solar_pv 8 panel system, 400W solar panels, 5.8kw Growatt battery - 3.2 kWp system 6110.0 True 17.0 79.1 2235.5623 0.5186504 79.1 619.02716 48.0 2.0 False whatever
4 51133 24800396 660538 2026-02-19 16:14:48.517937 solar_pv solar_pv 10 panel system, 400W solar panels - 4.0 kWp system 5520.0 True 13.0 58.5 1883.4657 0.43696404 58.5 521.5316 48.0 2.0 False whatever
5 52686 24801949 660529 2026-02-19 16:14:52.052740 heating boiler_upgrade Upgrade to a new condensing boiler. Upgrade heating controls to Room thermostat, programmer and TRVs 8008.6 True 12.9 132.9 0.0 1.1556525 132.9 1806.0955 26.5 4.0 False whatever
6 52707 24801970 660529 2026-02-19 16:14:52.052740 solar_pv solar_pv 10 panel system, 400W solar panels - 4.0 kWp system 5520.0 True 16.1 68.8 1837.0155 0.4261876 68.8 508.6696 48.0 2.0 False whatever
7 55310 24804573 660688 2026-02-19 16:15:04.461456 solar_pv solar_pv 5 panel system, 400W solar panels - 2.0 kWp system 5100.0 True 9.0 41.4 896.6345 0.20801921 41.4 248.27809 48.0 2.0 False whatever
8 55380 24804643 660690 2026-02-19 16:15:04.461456 low_energy_lighting low_energy_lighting Install low energy lighting in 14 outlets 49.0 True 2.0 18.2 766.5 0.124173 18.2 212.24385 1.0 0.125 False whatever
9 55384 24804647 660690 2026-02-19 16:15:04.461456 solar_pv solar_pv 9 panel system, 400W solar panels - 3.6 kWp system 5420.0 True 10.0 43.9 1760.723 0.40848774 43.9 487.54422 48.0 2.0 False whatever
10 62983 24812246 660599 2026-02-19 16:18:57.606337 loft_insulation loft_insulation Install 300mm of Knauf Loft Roll 44 glass fibre roll in your loft 600.0 True 2.3 2.3 8.4 102.8 3178.2 0.9 102.8 221.72618 8.0 1.0 False whatever
11 62985 24812248 660599 2026-02-19 16:18:57.606337 low_energy_lighting low_energy_lighting Install low energy lighting in 4 outlets 14.0 True 1.0 14.2 219.0 0.0 14.2 60.6411 1.0 0.125 False whatever
12 62987 24812250 660599 2026-02-19 16:18:57.606337 heating time_temperature_zone_control Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & temperature zone control) 604.584 True 3.3 18.4 527.7 0.2 18.4 36.814835 3.08 1.0 False whatever
13 62989 24812252 660600 2026-02-19 16:18:57.606337 suspended_floor_insulation suspended_floor_insulation Install 75mm Q-bot underfloor insulation insulation in suspended floor 3281.25 True 0.87 0.22 4.0 99.2 1816.6 0.6 99.2 126.734566 57.05 2.3770833 False whatever
14 62992 24812255 660600 2026-02-19 16:18:57.606337 heating time_temperature_zone_control Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & temperature zone control) 604.584 True 1.9 17.7 650.1 0.2 17.7 45.354034 3.08 1.0 False whatever

View file

@ -0,0 +1,540 @@
import pandas as pd
import numpy as np
from pathlib import Path
import time
from backend.export.property_scenarios.main import process_export
from backend.export.property_scenarios.input_schema import ExportRequest
from backend.app.db.models.portfolio import PropertyModel, Epc, Portfolio, PortfolioStatus, PortfolioGoal, \
PropertyCreationStatus, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import PlanModel, Recommendation, PlanRecommendations, \
RecommendationMaterials
from backend.app.db.models.materials import Material
from utils.logger import setup_logger
FIXTURE_PATH = Path("backend/export/tests/fixtures")
logger = setup_logger()
def load_csv(name: str) -> pd.DataFrame:
df = pd.read_csv(FIXTURE_PATH / name)
df = df.replace({np.nan: None})
return df
def test_default_export_integration(db_session):
# ----------------------------------------
# 1) Load csvs
# ----------------------------------------
t0 = time.perf_counter()
portfolio_df = load_csv("portfolio_569.csv")
properties_df = load_csv("properties_569.csv")
property_details_epc_df = load_csv("property_details_epc_569.csv")
plans_df = load_csv("plans_569.csv")
plan_recs_df = load_csv("plan_recs_569.csv")
recommendations_df = load_csv("recommendations_569.csv")
logger.info(
"Loaded CSVs in %.2f seconds | properties=%s plans=%s recs=%s",
time.perf_counter() - t0,
len(properties_df),
len(plans_df),
len(recommendations_df),
)
logger.info("Starting database load")
db_load_t0 = time.perf_counter()
# ----------------------------------------
# 2) Insert test portfolio
# ----------------------------------------
portfolios = []
for row in portfolio_df.itertuples(index=False):
portfolios.append(
Portfolio(
id=row.id,
name=row.name,
status=PortfolioStatus[row.status.split(".")[-1]],
goal=PortfolioGoal[row.goal.split(".")[-1]] if row.goal else None,
)
)
db_session.bulk_save_objects(portfolios)
db_session.flush()
# ----------------------------------------
# 3) Insert test property
# ----------------------------------------
properties = []
for row in properties_df.itertuples(index=False):
row_dict = row._asdict()
row_dict["uprn"] = int(row_dict["uprn"]) if row_dict.get("uprn") else None
row_dict["building_reference_number"] = (
int(row_dict["building_reference_number"])
if row_dict.get("building_reference_number")
else None
)
prop = PropertyModel(**{
col: row_dict[col]
for col in PropertyModel.__table__.columns.keys()
if col in row_dict
})
prop.creation_status = PropertyCreationStatus[
row_dict["creation_status"].split(".")[-1]
]
prop.status = PortfolioStatus[row_dict["status"].split(".")[-1]]
if row_dict.get("current_epc_rating"):
prop.current_epc_rating = Epc[
row_dict["current_epc_rating"].split(".")[-1]
]
properties.append(prop)
db_session.bulk_save_objects(properties)
db_session.flush()
# ----------------------------------------
# 4) Insert property details - EPC
# ----------------------------------------
epc_rows = []
for row in property_details_epc_df.itertuples(index=False):
row_dict = row._asdict()
# Build only fields that exist on the model
epc_data = {
col.name: row_dict[col.name]
for col in PropertyDetailsEpcModel.__table__.columns.values()
if col.name in row_dict and col.name not in ["id", "property_id", "portfolio_id"]
}
epc = PropertyDetailsEpcModel(
property_id=row.property_id,
portfolio_id=row.portfolio_id,
**epc_data,
)
epc_rows.append(epc)
db_session.bulk_save_objects(epc_rows)
db_session.flush()
# ----------------------------------------
# 4) Insert default plan
# ----------------------------------------
plans = []
for row in plans_df.itertuples(index=False):
row_dict = row._asdict()
if row_dict.get("post_epc_rating"):
row_dict["post_epc_rating"] = Epc[
row_dict["post_epc_rating"].split(".")[-1]
]
row_dict["scenario_id"] = None
plan = PlanModel(**{
col: row_dict[col]
for col in PlanModel.__table__.columns.keys()
if col in row_dict
})
plans.append(plan)
db_session.bulk_save_objects(plans)
db_session.flush()
# ----------------------------------------
# 5) Insert recommendation
# ----------------------------------------
recs = [
Recommendation(**{
col: row[col]
for col in Recommendation.__table__.columns.keys()
if col in row
})
for _, row in recommendations_df.iterrows()
]
db_session.bulk_save_objects(recs)
db_session.flush()
# ----------------------------------------
# 6) Insert PlanRecommendations
# ----------------------------------------
links = [
PlanRecommendations(
plan_id=row.plan_id,
recommendation_id=row.recommendation_id,
)
for row in plan_recs_df.itertuples(index=False)
]
db_session.bulk_save_objects(links)
db_session.commit()
logger.info("Inserted all data in %.2f seconds", time.perf_counter() - db_load_t0)
# ----------------------------------------
# 6) Build payload
# ----------------------------------------
body_dict = {
"task_id": "test",
"subtask_id": "test",
"portfolio_id": 569,
"scenario_ids": [],
"default_plans_only": True,
}
payload = ExportRequest.model_validate(body_dict)
# ----------------------------------------
# 7) Call process_export
# ----------------------------------------
logger.info(
"Recommendation count in DB: %s",
db_session.query(Recommendation).count()
)
logger.info(
"Property count in DB: %s",
db_session.query(PropertyModel).count()
)
logger.info(
"Property EPC in DB: %s",
db_session.query(PropertyDetailsEpcModel).count()
)
logger.info(
"Plan count in DB: %s",
db_session.query(PlanModel).count()
)
logger.info(
"PlanRecommendatons count in DB: %s",
db_session.query(PlanModel).count()
)
logger.info("Starting process_export")
process_t0 = time.perf_counter()
result = process_export(payload, session=db_session)
logger.info("process_export finished in %.2f seconds", time.perf_counter() - process_t0)
# ----------------------------------------
# 8) Assertions
# ----------------------------------------
assert "default_plans" in result, "Expected 'default_plans' in export result, got {}".format(result.keys())
df = result["default_plans"]
assert df.shape[0] == 10, "Expected 10 properties in the export, got {}".format(df.shape[0])
failed = df[df["predicted_post_works_sap"] < 69]
failed_property_types = failed["property_type"].value_counts().to_dict()
assert failed_property_types["Flat"] == 2
# Check the houses
assert failed.shape[0]
assert df["total_retrofit_cost"].sum() == 41706.585999999996, (
"Expected total retrofit cost to be 10000, got {}".format(df["total_retrofit_cost"].sum())
)
assert df["predicted_post_works_sap"].sum() == 698.1, (
"Expected total predicted post works SAP to be 698.1, got {}".format(df["predicted_post_works_sap"].sum())
)
assert df["sap_points"].sum() == 100.10000000000001, (
"Expected total SAP points increase to be 100.10000000000001, got {}".format(df["sap_points"].sum())
)
assert df.shape == (10, 95), "Expected dataframe shape to be (10, 11), got {}".format(df.shape)
def test_solar_with_battery_example(db_session):
test_portfolio_id = 1
test_property_id = 1
portfolio_df = pd.DataFrame(
[{'id': test_portfolio_id, 'name': 'Example', 'budget': None,
'status': 'PortfolioStatus.SCOPING', 'goal': 'PortfolioGoal.NONE', 'cost': None, 'number_of_properties': None,
'co2_equivalent_savings': None, 'energy_savings': None, 'energy_cost_savings': None,
'property_valuation_increase': None, 'rental_yield_increase': None, 'total_work_hours': None,
'labour_days': None, 'created_at': '2026-02-12 21:23:37.862000+00:00',
'updated_at': '2026-02-12 21:23:37.862000+00:00', 'epc_breakdown_pre_retrofit': None,
'epc_breakdown_post_retrofit': None, 'n_units_to_retrofit': None, 'co2_per_unit_pre_retrofit': None,
'co2_per_unit_post_retrofit': None, 'energy_bill_per_unit_pre_retrofit': None,
'energy_bill_per_unit_post_retrofit': None, 'energy_consumption_per_unit_pre_retrofit': None,
'energy_consumption_per_unit_post_retrofit': None, 'valuation_improvement_per_unit': None,
'cost_per_unit': None, 'cost_per_co2_saved': None, 'cost_per_sap_point': None,
'valuation_return_on_investment': None}]
)
properties_df = pd.DataFrame(
[{'id': test_property_id, 'portfolio_id': test_portfolio_id, 'creation_status': 'PropertyCreationStatus.READY',
'uprn': 100090438731, 'landlord_property_id': 'BARR052', 'building_reference_number': 3460742868.0,
'status': 'PortfolioStatus.ASSESSMENT', 'address': '52, Barrack Street', 'postcode': 'CO1 2LR',
'has_pre_condition_report': True, 'has_recommendations': True, 'created_at': '2026-02-12 21:59:02.744427',
'updated_at': '2026-02-19 16:18:57.941443', 'property_type': 'House', 'built_form': 'End-Terrace',
'local_authority': 'Colchester', 'constituency': 'Colchester', 'number_of_rooms': 4.0, 'year_built': 1900.0,
'tenure': 'rental (private)', 'current_epc_rating': 'Epc.E', 'current_sap_points': 53.0,
'current_valuation': 0.0, 'installed_measures_sap_point_adjustment': 0.0,
'is_sap_points_adjusted_for_installed_measures': False, 'original_sap_points': 53.0}]
)
property_details_epc_df = pd.DataFrame(
[
{'id': 1534934, 'property_id': test_property_id, 'portfolio_id': test_portfolio_id,
'full_address': '48, Medcalf Road', 'lodgement_date': '2018-09-05', 'is_expired': False,
'total_floor_area': 68.0, 'walls': 'Solid brick, as built, no insulation', 'walls_rating': 1,
'roof': 'Pitched, no insulation', 'roof_rating': 1.0, 'floor': 'Solid, no insulation',
'floor_rating': None,
'windows': 'Fully double glazed', 'windows_rating': 4, 'heating': 'Boiler and radiators, mains gas',
'heating_rating': 4, 'heating_controls': 'Programmer, room thermostat and trvs',
'heating_controls_rating': 4,
'hot_water': 'From main system', 'hot_water_rating': 4,
'lighting': 'Low energy lighting in all fixed outlets', 'lighting_rating': 5,
'mainfuel': 'Mains gas not community', 'ventilation': 'natural', 'solar_pv': 0.0, 'solar_hot_water': False,
'wind_turbine': 0.0, 'floor_height': 2.55, 'number_heated_rooms': None, 'heat_loss_corridor': False,
'unheated_corridor_length': None, 'number_of_open_fireplaces': 0, 'number_of_extensions': 0,
'number_of_storeys': None, 'mains_gas': True, 'energy_tariff': 'Single',
'primary_energy_consumption': 278.0,
'co2_emissions': 3.81, 'current_energy_demand': 14643.366,
'current_energy_demand_heating_hotwater': 12185.6,
'estimated': False, 'sap_05_overwritten': False, 'sap_05_score': None, 'sap_05_epc_rating': None,
'heating_cost_current': 711.0628, 'hot_water_cost_current': 139.06198, 'lighting_cost_current': 70.770935,
'appliances_cost_current': 609.7844, 'gas_standing_charge': 128.0785,
'electricity_standing_charge': 199.8375,
'original_co2_emissions': 3.81, 'original_primary_energy_consumption': 278.0,
'original_current_energy_demand': 14643.366, 'original_current_energy_demand_heating_hotwater': 12185.6,
'installed_measures_co2_adjustment': 0.0, 'installed_measures_energy_demand_adjustment': 0.0,
'installed_measures_total_energy_bill_adjustment': 0.0, 'installed_measures_heat_demand_adjustment': 0.0,
'is_epc_adjusted_for_installed_measures': False}
]
)
plans_df = pd.DataFrame(
[
{'id': 0, 'name': None, 'portfolio_id': test_portfolio_id, 'property_id': test_property_id,
'scenario_id': 1060, 'created_at': '2026-02-19 16:14:45.560816', 'is_default': True,
'valuation_increase_lower_bound': 0.0302,
'valuation_increase_upper_bound': 0.07, 'valuation_increase_average': 0.048226666, 'plan_type': None,
'post_sap_points': 71.5, 'post_epc_rating': 'Epc.C', 'post_co2_emissions': 4.1813498,
'co2_savings': 0.71865046, 'post_energy_bill': 1447.5204, 'energy_bill_savings': 691.6662,
'post_energy_consumption': 15303.688, 'energy_consumption_savings': 3276.7622,
'valuation_post_retrofit': None, 'valuation_increase': None, 'cost_of_works': 6984.568,
'contingency_cost': 1003.9568}
]
)
plan_recs_df = pd.DataFrame(
[{'id': 0, 'plan_id': 0, 'recommendation_id': 0}]
)
recommendations_df = pd.DataFrame(
[{'id': 0, 'property_id': test_property_id, 'created_at': '2026-02-19 16:14:45.560816',
'type': 'solar_pv', 'measure_type': 'solar_pv',
'description': 'Fit solar',
'estimated_cost': 10000, 'default': True, 'starting_u_value': None, 'new_u_value': None, 'sap_points': 1.5,
'heat_demand': 14.9, 'kwh_savings': 1041.2, 'co2_equivalent_savings': 0.2, 'energy_savings': 14.9,
'energy_cost_savings': 72.639015, 'property_valuation_increase': None, 'rental_yield_increase': None,
'total_work_hours': 4.16, 'labour_days': 1.0, 'already_installed': False, 'plan_name': 'whatever'}
]
)
recommendations_materials_df = pd.DataFrame(
[
{
"id": 0, "recommendation_id": 0, "material_id": 0, "depth": None, "quantity": 1.0,
"quantity_unit": "part",
"estimated_cost": 10000, "created_at": '2026-02-19 16:14:45.560816',
"updated_at": '2026-02-19 16:14:45.560816',
}
]
)
materials_df = pd.DataFrame(
[
{'id': 0, 'type': 'solar_pv', 'description': 'Some solar product',
'depth': 75.0,
'depth_unit': 'mm', 'cost': None, 'cost_unit': 'gbp_per_m2', 'r_value_per_mm': 0.030303031,
'r_value_unit': 'square_meter_kelvin_per_watt', 'thermal_conductivity': 0.033,
'thermal_conductivity_unit': 'watt_per_meter_kelvin', 'link': 'Test',
'created_at': "'2026-02-19 16:14:45.560816", 'is_active': True,
'prime_material_cost': None,
'material_cost': 0.0, 'labour_cost': 0.0, 'labour_hours_per_unit': 0.0, 'plant_cost': 0.0,
'total_cost': 10000,
'notes': None, 'is_installer_quote': True, 'innovation_rate': 0.25, 'size': None, 'size_unit': None,
'includes_scaffolding': True, 'includes_battery': True, 'battery_size': 5.8}
]
)
# Load into db
# -------------------------------------------------
# Insert Portfolio
# -------------------------------------------------
for row in portfolio_df.itertuples(index=False):
db_session.add(
Portfolio(
id=row.id,
name=row.name,
status=PortfolioStatus[row.status.split(".")[-1]],
goal=PortfolioGoal[row.goal.split(".")[-1]],
)
)
db_session.flush()
# -------------------------------------------------
# Insert Property
# -------------------------------------------------
for row in properties_df.itertuples(index=False):
prop = PropertyModel(
id=row.id,
portfolio_id=row.portfolio_id,
creation_status=PropertyCreationStatus[row.creation_status.split(".")[-1]],
status=PortfolioStatus[row.status.split(".")[-1]],
uprn=row.uprn,
property_type=row.property_type,
current_sap_points=row.current_sap_points,
current_epc_rating=Epc[row.current_epc_rating.split(".")[-1]],
)
db_session.add(prop)
db_session.flush()
# -------------------------------------------------
# Insert EPC Details
# -------------------------------------------------
for row in property_details_epc_df.itertuples(index=False):
epc = PropertyDetailsEpcModel(
property_id=row.property_id,
portfolio_id=row.portfolio_id,
full_address=row.full_address,
total_floor_area=row.total_floor_area,
walls=row.walls,
roof=row.roof,
windows=row.windows,
heating=row.heating,
solar_pv=row.solar_pv,
)
db_session.add(epc)
db_session.flush()
# -------------------------------------------------
# Insert Plan (default)
# -------------------------------------------------
for row in plans_df.itertuples(index=False):
plan = PlanModel(
id=row.id,
portfolio_id=row.portfolio_id,
property_id=row.property_id,
scenario_id=None, # default mode
is_default=row.is_default,
)
db_session.add(plan)
db_session.flush()
# -------------------------------------------------
# IMPORTANT: Force recommendation to be solar_pv
# -------------------------------------------------
recommendations_df.loc[0, "measure_type"] = "solar_pv"
for row in recommendations_df.itertuples(index=False):
rec = Recommendation(
id=row.id,
property_id=row.property_id,
measure_type=row.measure_type,
estimated_cost=row.estimated_cost,
default=row.default,
already_installed=row.already_installed,
sap_points=row.sap_points,
type=row.type,
description=row.description
)
db_session.add(rec)
db_session.flush()
# -------------------------------------------------
# Link Plan -> Recommendation
# -------------------------------------------------
for row in plan_recs_df.itertuples(index=False):
db_session.add(
PlanRecommendations(
plan_id=row.plan_id,
recommendation_id=row.recommendation_id,
)
)
db_session.flush()
# -------------------------------------------------
# Insert Material (includes_battery=True)
# -------------------------------------------------
for row in materials_df.itertuples(index=False):
material = Material(
id=row.id,
type=row.type,
description=row.description,
depth_unit=row.depth_unit,
cost_unit=row.cost_unit,
r_value_unit=row.r_value_unit,
thermal_conductivity_unit=row.thermal_conductivity_unit,
includes_battery=row.includes_battery,
is_active=row.is_active,
)
db_session.add(material)
db_session.flush()
# -------------------------------------------------
# Link Recommendation -> Material
# -------------------------------------------------
for row in recommendations_materials_df.itertuples(index=False):
db_session.add(
RecommendationMaterials(
recommendation_id=row.recommendation_id,
material_id=row.material_id,
depth=row.depth or 0.0,
quantity=row.quantity,
quantity_unit=row.quantity_unit,
estimated_cost=row.estimated_cost,
)
)
db_session.commit()
payload = ExportRequest.model_validate({
"task_id": "test",
"subtask_id": "test",
"portfolio_id": test_portfolio_id,
"scenario_ids": [],
"default_plans_only": True,
})
result = process_export(payload, session=db_session)
assert "default_plans" in result
df = result["default_plans"]
assert "solar_pv_with_battery" in df.columns
# solar_pv should NOT exist
assert "solar_pv" not in df.columns
assert df.shape[0] == 1, "Expected 1 property in the export, got {}".format(df.shape[0])
# Cost should land in correct column
assert df["solar_pv_with_battery"].iloc[0] == 10000

View file

@ -1,9 +1,28 @@
FROM public.ecr.aws/lambda/python:3.10
FROM public.ecr.aws/lambda/python:3.11
ARG DEV_DB_HOST
ARG DEV_DB_PORT
ARG DEV_DB_NAME
ENV DB_HOST=${DEV_DB_HOST}
ENV DB_PORT=${DEV_DB_PORT}
ENV DB_NAME=${DEV_DB_NAME}
# Set working directory (Lambda task root)
WORKDIR /var/task
# -----------------------------
COPY backend/postcode_splitter/handler/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy necessary files for database and utility imports
COPY utils/ utils/
COPY backend/ backend/
COPY datatypes/ datatypes/
# Copy the handler
COPY backend/postcode_splitter/main.py .
# Lambda handler
# -----------------------------
CMD ["main.handler"]

View file

@ -0,0 +1,11 @@
pandas==2.2.2
numpy<2.0
requests
tqdm
openpyxl
epc-api-python==1.0.2
boto3==1.35.44
sqlmodel
sqlalchemy==2.0.36
psycopg2-binary==2.9.10
pydantic-settings==2.6.0

View file

@ -1,127 +1,278 @@
import os
import sys
import json
import pandas as pd
import requests
from backend.address2UPRN.main import (
resolve_uprns_for_postcode_group,
get_epc_data_with_postcode,
import boto3
from uuid import UUID, uuid4
from utils.s3 import (
read_csv_from_s3 as read_csv_from_s3_dict,
save_csv_to_s3,
parse_s3_uri,
)
from utils.logger import setup_logger
from tqdm import tqdm
from backend.app.db.functions.tasks.Tasks import SubTaskInterface
from datetime import datetime
logger = setup_logger()
def sanitise_postcode(postcode: str) -> str | None:
def upload_batch_to_s3(
batch_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
) -> str:
"""
Normalise postcode for grouping.
- Uppercase
- Remove all whitespace
Upload batch DataFrame to S3 as CSV.
"""
if pd.isna(postcode):
return None
if bucket_name is None:
bucket_name = os.getenv("S3_BUCKET_NAME")
return postcode.upper().replace(" ", "")
def is_valid_postcode(postcode_clean: str) -> bool:
"""
Validate postcode using postcodes.io.
Expects a sanitised postcode (e.g. E84SQ).
Returns True if valid, False otherwise.
"""
POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
if not postcode_clean:
return False
if not bucket_name:
logger.error(
"S3 bucket name not provided and S3_BUCKET_NAME environment variable not set"
)
raise ValueError("S3_BUCKET_NAME not configured")
try:
resp = requests.get(
POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
timeout=5,
file_name = f"{datetime.now().isoformat()}_{str(uuid4())[:8]}"
file_key = (
f"ara_postcode_splitter_batches/{task_id}/{sub_task_id}/{file_name}.csv"
)
resp.raise_for_status()
return resp.json().get("result", False)
except requests.RequestException:
# Network issues, rate limits, etc.
return False
success = save_csv_to_s3(batch_df, bucket_name, file_key)
if success:
s3_uri = f"s3://{bucket_name}/{file_key}"
logger.info(f"Successfully uploaded batch to {s3_uri}")
return s3_uri
else:
logger.error(f"Failed to upload batch to S3")
raise ValueError("Failed to save CSV to S3")
except Exception as e:
logger.error(f"Error uploading batch to S3: {str(e)}")
raise
def main():
df = pd.read_excel("hackney.xlsx", sheet_name="Sustainability")
df = df.head(500)
def send_to_address2uprn_queue(task_id: str, sub_task_id: str, s3_uri: str) -> str:
"""
Send a batch to the address2UPRN SQS queue with S3 reference.
# Sanitise postcodes
df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode)
Args:
task_id: The parent task ID
sub_task_id: The new subtask ID for this batch
s3_uri: S3 URI pointing to the batch CSV file
# --- validate AFTER grouping (save API calls) ---
Returns:
Message ID from SQS
"""
sqs_client = boto3.client("sqs")
queue_url = os.getenv("ADDRESS2UPRN_QUEUE_URL")
# Get unique, non-null postcodes
unique_postcodes = df["postcode_clean"].dropna().unique()
if not queue_url:
raise ValueError("ADDRESS2UPRN_QUEUE_URL environment variable not set")
# Validate each postcode once, TODOadd a progress bar
postcode_validity = {
pc: is_valid_postcode(pc)
for pc in tqdm(unique_postcodes, total=len(unique_postcodes))
message_body = {
"task_id": task_id,
"sub_task_id": sub_task_id,
"s3_uri": s3_uri,
}
# Map validity back onto dataframe
df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
response = sqs_client.send_message(
QueueUrl=queue_url,
MessageBody=json.dumps(message_body),
)
logger.info(
f"Sent message to address2UPRN queue. "
f"Task: {task_id}, SubTask: {sub_task_id}, MessageId: {response['MessageId']}"
)
return response["MessageId"]
def create_batch_and_send_to_address2uprn(
batch_df: pd.DataFrame,
task_id: str,
sub_task_id: str,
subtask_interface: SubTaskInterface,
bucket_name: str,
) -> str:
"""
Create a batch DataFrame, upload to S3, create subtask, and send to address2UPRN queue.
"""
# Upload batch to S3
s3_uri = upload_batch_to_s3(batch_df, str(task_id), str(sub_task_id), bucket_name)
# Create a new subtask for this batch with all inputs
created_batch_sub_task_id = subtask_interface.create_subtask(
task_id=task_id,
inputs={
"task_id": str(task_id),
"s3_uri": s3_uri,
},
)
logger.info(f"Created batch subtask {created_batch_sub_task_id}")
# Send message with S3 reference
send_to_address2uprn_queue(
task_id=str(task_id),
sub_task_id=str(created_batch_sub_task_id),
s3_uri=s3_uri,
)
return created_batch_sub_task_id
def handler(event, context, local=False):
print(f"Function: {context.function_name}")
print(f"Request ID: {context.aws_request_id}")
# Example SQS message for testing (copy and paste into SQS):
if local is True:
event = {
"Records": [
{
"body": json.dumps(
{
"task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
"sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
"s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for Domna_transformed.csv",
}
)
}
]
}
# Handle both single event and batch events (SQS, etc.)
records = event.get("Records", [event])
results = []
errors = []
subtask_interface = SubTaskInterface()
bucket_name = os.getenv("S3_BUCKET_NAME")
if local:
bucket_name = "retrofit-data-dev"
for postcode, group_df in tqdm(
df[df["postcode_valid"]].groupby("postcode_clean"),
desc="Resolving UPRNs by postcode",
):
try:
epc_df = get_epc_data_with_postcode(postcode)
for record in records:
if local:
record = records[0]
task_id = None
subtask_id = None
# Parse body (inputs)
if epc_df.empty:
tmp = group_df.copy()
tmp["found_uprn"] = None
tmp["status"] = "no_epc_results"
results.append(tmp)
continue
if isinstance(record.get("body"), str):
body = json.loads(record["body"])
else:
body = record.get("body", {})
resolved = resolve_uprns_for_postcode_group(
group_df=group_df,
epc_df=epc_df,
# Validate required fields
task_id = body.get("task_id")
subtask_id = body.get("sub_task_id")
s3_uri = body.get("s3_uri")
# Convert task_id to UUID
task_id = UUID(task_id) if isinstance(task_id, str) else task_id
subtask_id = UUID(subtask_id) if isinstance(subtask_id, str) else subtask_id
# Mark subtask as in progress
subtask_interface.update_subtask_status(subtask_id, "in progress")
logger.info(f"Marked subtask {subtask_id} as in progress")
# Read CSV from S3
bucket, key = parse_s3_uri(s3_uri)
logger.info(f"S3 Bucket: {bucket}, Key: {key}")
csv_data = read_csv_from_s3_dict(bucket, key)
df = pd.DataFrame(csv_data)
logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns")
# Sanitise postcodes
df["postcode_clean"] = df["postcode"].str.upper().str.replace(" ", "")
df = df.dropna(subset=["postcode_clean"])
batch_size = 500
if df.shape[0] < batch_size:
create_batch_and_send_to_address2uprn(
batch_df=df,
task_id=task_id,
sub_task_id=subtask_id,
subtask_interface=subtask_interface,
bucket_name=bucket_name,
)
else:
postcode_to_addresses = {
postcode: group
for postcode, group in df.groupby("postcode_clean", sort=False)
}
results.append(resolved)
count = 0
buffer = []
except Exception as e:
tmp = group_df.copy()
tmp["found_uprn"] = None
tmp["status"] = "exception"
tmp["error"] = str(e)
results.append(tmp)
for postcode, group_df in postcode_to_addresses.items():
group_len = len(group_df)
final_df = pd.concat(results, ignore_index=True)
a = final_df[
[
"best_match_lexiscore",
"Address 1",
"best_match_address",
"Postcode",
"UPRN",
"best_match_uprn",
]
] # add levi score to viewing
b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing
b = b[
[
"best_match_lexiscore",
"Address 1",
"best_match_address",
"Postcode",
"UPRN",
"best_match_uprn",
]
]
# If single postcode is bigger than batch_size → send directly
if group_len >= batch_size:
if buffer:
create_batch_and_send_to_address2uprn(
batch_df=pd.concat(buffer, ignore_index=True),
task_id=task_id,
sub_task_id=subtask_id,
subtask_interface=subtask_interface,
bucket_name=bucket_name,
)
buffer = []
count = 0
create_batch_and_send_to_address2uprn(
batch_df=group_df,
task_id=task_id,
sub_task_id=subtask_id,
subtask_interface=subtask_interface,
bucket_name=bucket_name,
)
continue
def handler(event, context):
print("hello Postcode splitter world")
return {"statusCode": 200, "body": "hello world"}
# If adding would exceed batch → flush first
if count + group_len > batch_size:
create_batch_and_send_to_address2uprn(
batch_df=pd.concat(buffer, ignore_index=True),
task_id=task_id,
sub_task_id=subtask_id,
subtask_interface=subtask_interface,
bucket_name=bucket_name,
)
buffer = []
count = 0
# Add group
buffer.append(group_df)
count += group_len
if __name__ == "__main__":
main()
# Final flush
if buffer:
create_batch_and_send_to_address2uprn(
batch_df=pd.concat(buffer, ignore_index=True),
task_id=task_id,
sub_task_id=subtask_id,
subtask_interface=subtask_interface,
bucket_name=bucket_name,
)
# Mark subtask as completed
subtask_interface.update_subtask_status(
subtask_id,
"completed",
outputs={"rows_processed": "completed"},
)
return {
"statusCode": 200,
"body": json.dumps(
{"processed": results, "errors": errors if errors else None}
),
}

View file

@ -41,7 +41,10 @@ epc_data = pd.read_csv(
# Classify floor area in <73m2, 73-98, 99-200, 200+
epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply(
lambda x: "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+")
lambda x: (
"<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+"
)
)
# 73-98 185
# <73 156
@ -65,7 +68,11 @@ import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
)
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
@ -74,56 +81,79 @@ def get_data(portfolio_id, scenario_ids):
session.begin()
# Get properties and their details for a specific portfolio
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
).all()
properties_query = (
session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID
.all()
)
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
{
**{
col.name: getattr(prop.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
plans_query = (
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
)
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan['id'] for plan in plans_data]
plan_ids = [plan["id"] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
recommendations_query = (
session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(
PlanModel,
PlanModel.id
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True, # Filtering for default recommendations
)
.all()
)
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
col.name) for
col in Recommendation.__table__.columns},
"Scenario ID": rec.scenario_id}
{
**{
col.name: (
getattr(rec.Recommendation, col.name)
if hasattr(rec, "Recommendation")
else getattr(rec, col.name)
)
for col in Recommendation.__table__.columns
},
"Scenario ID": rec.scenario_id,
}
for rec in recommendations_query
]
@ -132,7 +162,9 @@ def get_data(portfolio_id, scenario_ids):
return properties_data, plans_data, recommendations_data
properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[205])
properties_data, plans_data, recommendations_data = get_data(
portfolio_id=124, scenario_ids=[205]
)
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
@ -147,12 +179,12 @@ recommended_measures_df = recommended_measures_df.drop(columns=["default"])
post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
post_install_sap = post_install_sap[post_install_sap["default"]]
# Sum up the sap points by property id
post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
post_install_sap = (
post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
)
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
index="property_id", columns="measure_type", values="estimated_cost"
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
@ -163,7 +195,7 @@ recommendations_measures_pivot = recommendations_measures_pivot.rename(
"double_glazing": "Cost: Double Glazing",
"loft_insulation": "Cost: Loft Insulation",
"mechanical_ventilation": "Cost: Ventilation",
"solar_pv": "Cost: Solar PV"
"solar_pv": "Cost: Solar PV",
}
)
recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
@ -186,16 +218,26 @@ recommendations_measures_pivot["Recommendation: Solar PV"] = (
recommendations_measures_pivot["Cost: Solar PV"] > 0
)
df = properties_df[
[
"property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
"current_epc_rating",
"current_sap_points", "total_floor_area", "number_of_rooms",
df = (
properties_df[
[
"property_id",
"uprn",
"address",
"postcode",
"property_type",
"walls",
"roof",
"heating",
"windows",
"current_epc_rating",
"current_sap_points",
"total_floor_area",
"number_of_rooms",
]
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
post_install_sap, how="left", on="property_id"
.merge(recommendations_measures_pivot, how="left", on="property_id")
.merge(post_install_sap, how="left", on="property_id")
)
df = df.drop(columns=["property_id"])
@ -222,25 +264,36 @@ df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"])
# We fill missings:
for col in [
"Recommendation: Air Source Heat Pump", "Recommendation: Cavity Wall Insulation",
"Recommendation: Double Glazing", "Recommendation: Loft Insulation", "Recommendation: Ventilation",
"Recommendation: Solar PV"
"Recommendation: Air Source Heat Pump",
"Recommendation: Cavity Wall Insulation",
"Recommendation: Double Glazing",
"Recommendation: Loft Insulation",
"Recommendation: Ventilation",
"Recommendation: Solar PV",
]:
df[col] = df[col].fillna(False)
for col in [
"Cost: Air Source Heat Pump", "Cost: Cavity Wall Insulation",
"Cost: Double Glazing", "Cost: Loft Insulation", "Cost: Ventilation",
"Cost: Solar PV"
"Cost: Air Source Heat Pump",
"Cost: Cavity Wall Insulation",
"Cost: Double Glazing",
"Cost: Loft Insulation",
"Cost: Ventilation",
"Cost: Solar PV",
]:
df[col] = df[col].fillna(0)
# Calculate post SAP
df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
lambda x: sap_to_epc(x)
)
df["Recommendation: Air Source Heat Pump"].sum()
df["Cost: Air Source Heat Pump"].sum()
df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", index=False)
df.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv",
index=False,
)

View file

@ -4,7 +4,11 @@ import numpy as np
from backend.app.utils import sap_to_epc
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
)
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
@ -13,56 +17,79 @@ def get_data(portfolio_id, scenario_ids):
session.begin()
# Get properties and their details for a specific portfolio
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
).all()
properties_query = (
session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID
.all()
)
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
{
**{
col.name: getattr(prop.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
plans_query = (
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
)
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan['id'] for plan in plans_data]
plan_ids = [plan["id"] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
recommendations_query = (
session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(
PlanModel,
PlanModel.id
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True, # Filtering for default recommendations
)
.all()
)
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation')
else getattr(rec, col.name) for
col in Recommendation.__table__.columns},
"Scenario ID": rec.scenario_id}
{
**{
col.name: (
getattr(rec.Recommendation, col.name)
if hasattr(rec, "Recommendation")
else getattr(rec, col.name)
)
for col in Recommendation.__table__.columns
},
"Scenario ID": rec.scenario_id,
}
for rec in recommendations_query
]
@ -94,16 +121,34 @@ def app():
)
property_asset_data = properties_df.merge(
mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn"
mod_property_data.drop(columns=["address", "postcode", "tenure"]),
how="left",
on="uprn",
)
property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False)
property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains(
"pitched", case=False
)
property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970
property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip()
property_asset_data["is_insulated"] = (
property_asset_data["walls"].str.split(",").str[1].str.strip().isin(
["filled cavity", "with external insulation", "filled cavity and external insulation"]
) | property_asset_data["walls"].str.split(",").str[2].str.strip().isin(["insulated"])
property_asset_data["wall_type"] = (
property_asset_data["walls"].str.split(" ").str[0].str.strip()
)
property_asset_data["is_insulated"] = property_asset_data["walls"].str.split(
","
).str[1].str.strip().isin(
[
"filled cavity",
"with external insulation",
"filled cavity and external insulation",
]
) | property_asset_data[
"walls"
].str.split(
","
).str[
2
].str.strip().isin(
["insulated"]
)
property_asset_data["is_insulated"] = np.where(
property_asset_data["is_insulated"], "Insulated", "Uninsulated"
@ -115,18 +160,26 @@ def app():
property_asset_data["pre_1970"], "Pre 1970", "Post 1970"
)
archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970"]
archetype_variables = [
"property_type",
"wall_type",
"is_insulated",
"is_pitched",
"pre_1970",
]
assigned_archetypes = (
property_asset_data.groupby(
archetype_variables
).size().reset_index().rename(columns={0: "n_properties"}).sort_values("n_properties", ascending=False)
property_asset_data.groupby(archetype_variables)
.size()
.reset_index()
.rename(columns={0: "n_properties"})
.sort_values("n_properties", ascending=False)
)
# Make the archetype ID a concatenation of the variables
assigned_archetypes["archetype_id"] = assigned_archetypes[archetype_variables].apply(
lambda x: "_".join(x.astype(str)), axis=1
)
assigned_archetypes["archetype_id"] = assigned_archetypes[
archetype_variables
].apply(lambda x: "_".join(x.astype(str)), axis=1)
# Most prominent archetypes
prominent_archetypes = assigned_archetypes.head(6)
@ -136,7 +189,7 @@ def app():
property_asset_data = property_asset_data.merge(
assigned_archetypes[archetype_variables + ["archetype_id"]],
how="left",
on=archetype_variables
on=archetype_variables,
)
# Create age bands:
@ -148,7 +201,7 @@ def app():
property_asset_data["age_band"] = pd.cut(
property_asset_data["BUILD_YEAR"],
bins=[1959, 1969, 1979, 1989, 1999, 2022],
labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"]
labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"],
)
# Create floor area bands
@ -159,47 +212,59 @@ def app():
property_asset_data["floor_area_band"] = pd.cut(
property_asset_data["total_floor_area"],
bins=[0, 73, 97, 199, 10000],
labels=["0-73", "74-97", "98-199", "200+"]
labels=["0-73", "74-97", "98-199", "200+"],
)
property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy()
property_asset_data["archetype_group"] = np.where(
property_asset_data["archetype_id"].isin(other_archetypes["archetype_id"].values),
property_asset_data["archetype_id"].isin(
other_archetypes["archetype_id"].values
),
"other",
property_asset_data["archetype_group"]
property_asset_data["archetype_group"],
)
# For colour
wall_types = (
property_asset_data[["wall_type"]].value_counts().to_frame().reset_index().rename(
columns={"wall_type": "Wall Type"}
)
property_asset_data[["wall_type"]]
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"wall_type": "Wall Type"})
)
# Group into age bands
ages = (
property_asset_data[["age_band"]].value_counts()
property_asset_data[["age_band"]]
.value_counts()
.to_frame()
.reset_index().sort_values("age_band", ascending=True)
.reset_index()
.sort_values("age_band", ascending=True)
.rename(columns={"age_band": "Age Band"})
)
floor_area_bands = (
property_asset_data[["floor_area_band"]].value_counts()
property_asset_data[["floor_area_band"]]
.value_counts()
.to_frame()
.reset_index().sort_values("floor_area_band", ascending=True)
.reset_index()
.sort_values("floor_area_band", ascending=True)
.rename(columns={"floor_area_band": "Floor Area Band"})
)
archetype_counts = (
property_asset_data[["archetype_group"]].
value_counts().
to_frame().
reset_index()
property_asset_data[["archetype_group"]]
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"archetype_group": "Archetype"})
)
property_types = (
(property_asset_data["property_type"] + ": " + property_asset_data["built_form"]).
value_counts().
to_frame().
reset_index()
(
property_asset_data["property_type"]
+ ": "
+ property_asset_data["built_form"]
)
.value_counts()
.to_frame()
.reset_index()
.rename(columns={"index": "Property Type", 0: "Count"})
)
@ -217,18 +282,24 @@ def app():
totals = property_asset_data[
[
"Total_household_members",
"co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
"heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
"appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
"co2_emissions",
"current_energy_demand",
"current_energy_demand_heating_hotwater",
"heating_cost_current",
"hot_water_cost_current",
"lighting_cost_current",
"appliances_cost_current",
"gas_standing_charge",
"electricity_standing_charge",
]
].copy()
totals["total_cost"] = (
totals["heating_cost_current"] +
totals["hot_water_cost_current"] +
totals["lighting_cost_current"] +
totals["appliances_cost_current"] +
totals["gas_standing_charge"] +
totals["electricity_standing_charge"]
totals["heating_cost_current"]
+ totals["hot_water_cost_current"]
+ totals["lighting_cost_current"]
+ totals["appliances_cost_current"]
+ totals["gas_standing_charge"]
+ totals["electricity_standing_charge"]
)
print(
totals[
@ -259,38 +330,59 @@ def app():
scenario_recommendations_df = recommendations_df[
recommendations_df["Scenario ID"] == scenario
].copy()
].copy()
scenario_recommendations_df["contingency"] = contingency * scenario_recommendations_df["estimated_cost"]
scenario_recommendations_df["contingency"] = (
contingency * scenario_recommendations_df["estimated_cost"]
)
scenario_recommendations_df["total_cost"] = (
scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"]
scenario_recommendations_df["estimated_cost"]
+ scenario_recommendations_df["contingency"]
)
recommended_measures_df = scenario_recommendations_df[
["property_id", "measure_type", "estimated_cost", "default"]
]
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
recommended_measures_df = recommended_measures_df[
recommended_measures_df["default"]
]
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
# Metrics by property ID
aggregated_metrics = scenario_recommendations_df[
[
"property_id", "type", "default", "sap_points",
"energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency",
"total_cost"
"property_id",
"type",
"default",
"sap_points",
"energy_cost_savings",
"kwh_savings",
"co2_equivalent_savings",
"estimated_cost",
"contingency",
"total_cost",
]
]
aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]]
aggregated_metrics = aggregated_metrics.groupby("property_id")[
["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
"total_cost", "contingency"]
].sum().reset_index()
aggregated_metrics = (
aggregated_metrics.groupby("property_id")[
[
"sap_points",
"co2_equivalent_savings",
"energy_cost_savings",
"kwh_savings",
"estimated_cost",
"total_cost",
"contingency",
]
]
.sum()
.reset_index()
)
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
index="property_id", columns="measure_type", values="estimated_cost"
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
@ -299,30 +391,58 @@ def app():
for c in recommendations_measures_pivot.columns:
if c == "property_id":
continue
recommendations_measures_pivot["Recommendation: " + c] = recommendations_measures_pivot[c] > 0
recommendations_measures_pivot["Recommendation: " + c] = (
recommendations_measures_pivot[c] > 0
)
# We now create a final output
df = properties_df[
[
"property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
"current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
"co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
"heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
"appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
df = (
properties_df[
[
"property_id",
"uprn",
"address",
"postcode",
"property_type",
"walls",
"roof",
"heating",
"windows",
"current_epc_rating",
"current_sap_points",
"total_floor_area",
"number_of_rooms",
"co2_emissions",
"current_energy_demand",
"current_energy_demand_heating_hotwater",
"heating_cost_current",
"hot_water_cost_current",
"lighting_cost_current",
"appliances_cost_current",
"gas_standing_charge",
"electricity_standing_charge",
]
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
aggregated_metrics, how="left", on="property_id"
.merge(recommendations_measures_pivot, how="left", on="property_id")
.merge(aggregated_metrics, how="left", on="property_id")
)
df["bills_total_cost"] = (
df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] +
df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"]
df["heating_cost_current"]
+ df["hot_water_cost_current"]
+ df["lighting_cost_current"]
+ df["appliances_cost_current"]
+ df["gas_standing_charge"]
+ df["electricity_standing_charge"]
)
df = df.drop(columns=["property_id"])
for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]:
for c in [
"sap_points",
"co2_equivalent_savings",
"energy_cost_savings",
"kwh_savings",
]:
df[c] = df[c].fillna(0)
df = df.rename(
@ -345,16 +465,23 @@ def app():
# Calculate post SAP
df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
lambda x: sap_to_epc(x)
)
# Calculate the relative savings on carbon, kwh, and bills
df["relative_carbon_savings"] = df["co2_equivalent_savings"] / df["co2_emissions"]
df["relative_carbon_savings"] = (
df["co2_equivalent_savings"] / df["co2_emissions"]
)
df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"]
df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"]
# Add on the archetype
df = df.merge(
property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn"
property_asset_data[["uprn", "archetype_group"]],
how="left",
left_on="UPRN",
right_on="uprn",
)
# For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it
@ -387,7 +514,9 @@ def app():
printing_scenario_id = scenario_ids[0]
# EPC breakdown
print(scenario_data[printing_scenario_id]['Predicted Post Works EPC'].value_counts())
print(
scenario_data[printing_scenario_id]["Predicted Post Works EPC"].value_counts()
)
# Cost
# Total cost
print(scenario_data[printing_scenario_id]["total_cost"].sum())
@ -408,16 +537,24 @@ def app():
measure_details = {}
for scenario in scenario_ids:
measure_details[scenario] = {}
recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c]
measure_details[scenario]["count"] = scenario_data[scenario][recommendation_cols].sum().to_dict()
recommendation_cols = [
c for c in scenario_data[scenario].columns if "Recommendation:" in c
]
measure_details[scenario]["count"] = (
scenario_data[scenario][recommendation_cols].sum().to_dict()
)
# Get average cost per measure
measure_columns = [
c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c
c.split("Recommendation: ")[1]
for c in scenario_data[scenario].columns
if "Recommendation:" in c
]
# Take the mean, drop zero columns
measure_costs = {}
for m in measure_columns:
measure_costs[m] = float(scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean())
measure_costs[m] = float(
scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean()
)
measure_details[scenario]["cost_per_measure"] = measure_costs
pprint(measure_details[scenario_ids[0]]["count"])
@ -452,12 +589,27 @@ def app():
for scenario in scenario_ids:
df = scenario_data[scenario].copy()
avg_savings = df[
["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
"total_cost", "contingency"]
].mean().to_dict()
avg_savings["cost_per_sap_point"] = avg_savings["total_cost"] / avg_savings["sap_points"]
avg_savings["cost_per_carbon"] = avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
avg_savings = (
df[
[
"sap_points",
"co2_equivalent_savings",
"energy_cost_savings",
"kwh_savings",
"estimated_cost",
"total_cost",
"contingency",
]
]
.mean()
.to_dict()
)
avg_savings["cost_per_sap_point"] = (
avg_savings["total_cost"] / avg_savings["sap_points"]
)
avg_savings["cost_per_carbon"] = (
avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
)
scenario_metrics[scenario] = avg_savings
pprint(scenario_metrics[scenario_ids[0]])
@ -465,11 +617,11 @@ def app():
scenario_data[scenario_ids[0]]["loft_insulation"][
scenario_data[scenario_ids[0]]["loft_insulation"] > 0
].mean()
].mean()
scenario_data[scenario_ids[0]]["cavity_wall_insulation"][
scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0
].mean()
].mean()
# Testing checking floor risk
@ -477,11 +629,7 @@ def app():
def get_flood_risk(lat, lon, radius_km=1):
url = "https://environment.data.gov.uk/flood-monitoring/id/floods"
params = {
'lat': lat,
'long': lon,
'dist': radius_km # search radius in km
}
params = {"lat": lat, "long": lon, "dist": radius_km} # search radius in km
response = requests.get(url, params=params)
response.raise_for_status()
@ -495,20 +643,19 @@ def app():
print(f"{len(flood_warnings)} warning(s) found near the location:")
for warning in flood_warnings:
print(f"- Area: {warning.get('description')}")
print(f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})")
print(
f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})"
)
print(f" Message changed at: {warning.get('timeMessageChanged')}")
print()
return flood_warnings
from shapely.geometry import shape, Point
def get_flood_areas_near_point(lat, lon, radius_km=2):
url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas"
params = {
'lat': lat,
'long': lon,
'dist': radius_km
}
params = {"lat": lat, "long": lon, "dist": radius_km}
response = requests.get(url, params=params)
response.raise_for_status()
@ -531,7 +678,7 @@ def app():
if not features:
continue
flood_polygon = shape(features[0]['geometry'])
flood_polygon = shape(features[0]["geometry"])
try:
is_inside = flood_polygon.contains(point)
@ -539,12 +686,17 @@ def app():
is_inside = False
if is_inside:
print(f"📍 Point is inside flood area: {area['label']} ({area['notation']})")
print(
f"📍 Point is inside flood area: {area['label']} ({area['notation']})"
)
return area
from tqdm import tqdm
floor_warnings_data = []
for _, property in tqdm(property_asset_data.iterrows(), total=len(property_asset_data)):
for _, property in tqdm(
property_asset_data.iterrows(), total=len(property_asset_data)
):
# warnings = floor_warnings_data.extend(
# get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1)
# )
@ -556,7 +708,7 @@ def app():
"uprn": property["uprn"],
"address": property["address"],
"postcode": property["postcode"],
"area": resp
"area": resp,
}
)
continue
@ -570,7 +722,7 @@ def app():
"House_Cavity_Uninsulated_Pitched roof_Post 1970",
"other",
"House_System_Uninsulated_Pitched roof_Pre 1970",
"House_Solid_Uninsulated_Not Pitched Roof_Pre 1970"
"House_Solid_Uninsulated_Not Pitched Roof_Pre 1970",
]
values = [62, 36, 21, 16, 16, 4, 2]
@ -582,36 +734,39 @@ def app():
"Cavity wall insulation, ventilation",
"Bespoke retrofit measures",
"External wall insulation, roof insulation",
"Flat roof insulation, internal wall insulation"
"Flat roof insulation, internal wall insulation",
]
fig = go.Figure(go.Treemap(
labels=labels,
parents=[""] * len(labels), # No root
values=values,
hovertext=hovertext,
hoverinfo="text",
textinfo="none",
marker=dict(
line=dict(color="white", width=4),
colors=values,
colorscale="Blues"
fig = go.Figure(
go.Treemap(
labels=labels,
parents=[""] * len(labels), # No root
values=values,
hovertext=hovertext,
hoverinfo="text",
textinfo="none",
marker=dict(
line=dict(color="white", width=4), colors=values, colorscale="Blues"
),
)
))
)
fig.update_layout(
margin=dict(t=10, l=10, r=10, b=10),
plot_bgcolor="white",
paper_bgcolor="white"
margin=dict(t=10, l=10, r=10, b=10), plot_bgcolor="white", paper_bgcolor="white"
)
fig.show()
# Get the recommended measures by scenario id
recommendation_cols = [c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c]
measure_counts_by_scenario = scenario_data[scenario_ids[1]].groupby("archetype_group")[
recommendation_cols
].sum().reset_index()
recommendation_cols = [
c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c
]
measure_counts_by_scenario = (
scenario_data[scenario_ids[1]]
.groupby("archetype_group")[recommendation_cols]
.sum()
.reset_index()
)
measure_counts_by_scenario.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv"
@ -630,15 +785,13 @@ def app():
to_append = {"uprn": uprn}
for _id in scenario_ids:
scenario = scenario_data[_id][
scenario_data[_id]["uprn"] == uprn
].squeeze()
scenario = scenario_data[_id][scenario_data[_id]["uprn"] == uprn].squeeze()
val = PropertyValuation.estimate_valuation_improvement(
current_value=x["valuation"],
current_epc=scenario["Current EPC Rating"].value,
target_epc=scenario["Predicted Post Works EPC"],
total_cost=None
total_cost=None,
)
to_append[_id] = val["average_increase"]

File diff suppressed because it is too large Load diff

View file

@ -10,6 +10,7 @@ Additionally, we wil find the problematic records and remove them
Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan
or recommendations in case something went wrong
"""
import pandas as pd
from sqlalchemy.orm import Session
from backend.app.db.models.portfolio import PropertyModel
@ -19,8 +20,7 @@ from backend.app.db.connection import db_session
def get_uprns_for_portfolio(session: Session, portfolio_id: int) -> list[int]:
return [
uprn
for (uprn,) in
session.query(PropertyModel.uprn)
for (uprn,) in session.query(PropertyModel.uprn)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
if uprn is not None
@ -34,7 +34,7 @@ with db_session() as session:
sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
"data.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
missed_properties = sal[~sal["epc_os_uprn"].isin(completed_uprns)]
@ -44,7 +44,7 @@ missed_properties.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
"d_failed_properties_to_restart_20260102.xlsx",
sheet_name="Standardised Asset List",
index=False
index=False,
)
# Fixing an error - triggered jobs without removing EWI/IWI so need to delete all plans associated to these scenarios:
@ -52,14 +52,14 @@ scenario_id = None
from sqlalchemy import select, func
from sqlalchemy.orm import Session
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
def count_plans_for_scenario(session: Session, scenario_id: int) -> int:
return session.execute(
select(func.count())
.select_from(Plan)
.where(Plan.scenario_id == scenario_id)
.select_from(PlanModel)
.where(PlanModel.scenario_id == scenario_id)
).scalar_one()
@ -69,8 +69,7 @@ with db_session() as session:
def get_plan_ids_for_scenario(session: Session, scenario_id: int) -> list[int]:
result = session.execute(
select(Plan.id)
.where(Plan.scenario_id == scenario_id)
select(PlanModel.id).where(PlanModel.scenario_id == scenario_id)
)
return [row.id for row in result]
@ -84,7 +83,7 @@ from sqlalchemy.orm import Session
def chunked(iterable, size):
for i in range(0, len(iterable), size):
yield iterable[i:i + size]
yield iterable[i : i + size]
from sqlalchemy import text
@ -103,12 +102,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendation_materials
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation_materials rm
USING plan_recommendations pr
WHERE rm.recommendation_id = pr.recommendation_id
AND pr.plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -116,10 +117,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plan_recommendations
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -127,14 +130,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendations (only those used by these plans)
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation r
WHERE r.id IN (
SELECT DISTINCT recommendation_id
FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
)
"""),
"""
),
params,
)
@ -142,10 +147,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plans LAST
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan
WHERE id = ANY(:plan_ids)
"""),
"""
),
params,
)

View file

@ -5,6 +5,7 @@ This includes:
# EPC C, there should be a plan
2) If the plan is fabric first, make sure they are actually fabric first
"""
import pandas as pd
scenario_names = {
@ -33,7 +34,9 @@ for scenario_id, scenario_name in scenario_names.items():
)
# find properties that are below the scenario sap target, but have no recommended measures
df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id]
df["below_scenario_target"] = (
df["current_sap_points"] < scenario_sap_targets[scenario_id]
)
df["no_recommended_measures"] = df["sap_points"] == 0
df["zero_cost"] = df["total_retrofit_cost"] == 0
df["sap_points_above_zero"] = df["sap_points"] > 0
@ -45,7 +48,9 @@ for scenario_id, scenario_name in scenario_names.items():
].copy()
if scenario_sap_targets[scenario_id] == 81:
problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"]
problematic_properties = problematic_properties[
problematic_properties["property_type"] != "Flat"
]
zero_cost_above_zero_sap = df[
(df["sap_points_above_zero"] & df["zero_cost"])
@ -61,8 +66,12 @@ for scenario_id, scenario_name in scenario_names.items():
# pd.set_option('display.width', 1000)
# problematic_properties.head(len(problematic_properties))
print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})")
print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})")
print(
f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})"
)
print(
f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})"
)
problems.append(problematic_properties)
problems.append(zero_cost_above_zero_sap)
@ -97,12 +106,12 @@ all_problems = all_problems.drop_duplicates(subset=["uprn"])
sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
"data.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
sal2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
"UPRNS.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
sal = pd.concat([sal, sal2])
@ -114,7 +123,7 @@ retry.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
"d_problematic_properties_to_review_20260106.xlsx",
sheet_name="Standardised Asset List",
index=False
index=False,
)
# Delete associated plans
@ -126,19 +135,20 @@ uprns = retry["epc_os_uprn"].tolist()
from sqlalchemy.orm import Session
from backend.app.db.models.portfolio import PropertyModel
from backend.app.db.connection import db_session
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
from sqlalchemy import select, delete
from sqlalchemy.exc import NoResultFound
from sqlalchemy.orm import sessionmaker
def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]:
def get_property_ids_for_uprns(
session: Session, portfolio_id: int, uprns: list[int]
) -> list[int]:
return [
property.id
for property in session.query(PropertyModel)
.filter(
PropertyModel.portfolio_id == portfolio_id,
PropertyModel.uprn.in_(uprns)
PropertyModel.portfolio_id == portfolio_id, PropertyModel.uprn.in_(uprns)
)
.all()
]
@ -149,15 +159,21 @@ with db_session() as session:
# Get all and delete plans for these property IDs
def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]:
return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all()
def get_all_plans_for_property_ids(
session: Session, property_ids: list[int]
) -> list[PlanModel]:
return (
session.query(PlanModel).filter(PlanModel.property_id.in_(property_ids)).all()
)
def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]:
def get_ids_of_plans_for_deletion(
session: Session, property_ids: list[int]
) -> list[int]:
return [
plan.id
for plan in session.query(Plan)
.filter(Plan.property_id.in_(property_ids))
for plan in session.query(PlanModel)
.filter(PlanModel.property_id.in_(property_ids))
.all()
]
@ -168,7 +184,7 @@ with db_session() as session:
def chunked(iterable, size):
for i in range(0, len(iterable), size):
yield iterable[i:i + size]
yield iterable[i : i + size]
from sqlalchemy import text
@ -187,12 +203,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendation_materials
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation_materials rm
USING plan_recommendations pr
WHERE rm.recommendation_id = pr.recommendation_id
AND pr.plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -200,10 +218,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plan_recommendations
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -211,14 +231,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendations (only those used by these plans)
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation r
WHERE r.id IN (
SELECT DISTINCT recommendation_id
FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
)
"""),
"""
),
params,
)
@ -226,10 +248,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plans LAST
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan
WHERE id = ANY(:plan_ids)
"""),
"""
),
params,
)

View file

@ -3,31 +3,41 @@ from sqlalchemy.orm import Session
from sqlalchemy import text, select
from backend.app.db.connection import db_read_session
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
PORTFOLIO_ID = 435
with db_read_session() as session:
# Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419
estimated_epcs = session.query(PropertyDetailsEpcModel).filter(
# PropertyDetailsEpcModel.estimated == True,
PropertyDetailsEpcModel.property_id.in_(
session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == PORTFOLIO_ID)
estimated_epcs = (
session.query(PropertyDetailsEpcModel)
.filter(
# PropertyDetailsEpcModel.estimated == True,
PropertyDetailsEpcModel.property_id.in_(
session.query(PropertyModel.id).filter(
PropertyModel.portfolio_id == PORTFOLIO_ID
)
)
)
).all()
.all()
)
# Get the ids
estimated_epc_ids = [epc.property_id for epc in estimated_epcs]
# I want to get the UPRNS for these properties, from the property model
with db_read_session() as session:
estimated_uprns = session.query(PropertyModel.uprn).filter(
PropertyModel.id.in_(
session.query(PropertyDetailsEpcModel.property_id).filter(
PropertyDetailsEpcModel.id.in_(estimated_epc_ids)
estimated_uprns = (
session.query(PropertyModel.uprn)
.filter(
PropertyModel.id.in_(
session.query(PropertyDetailsEpcModel.property_id).filter(
PropertyDetailsEpcModel.id.in_(estimated_epc_ids)
)
)
)
).all()
.all()
)
estimated_uprns_list = [uprn for (uprn,) in estimated_uprns]
@ -35,16 +45,16 @@ with db_read_session() as session:
sal_1 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
"data.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
sal_2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
"UPRNS.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
sal = pd.concat([sal_1, sal_2])
sal = sal.drop_duplicates(subset=['epc_os_uprn'])
sal = sal.drop_duplicates(subset=["epc_os_uprn"])
estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy()
@ -55,20 +65,24 @@ SCENARIOS = [
# 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
# 859, # EPC C - no solid floor, ashp 3.0
# 885, # EPC B - fabric first, no solid floor, ashp 3.0
908, 909, 910
908,
909,
910,
]
# Get all plans, associated to these properties - the property IDs are in estimated_epc_ids
with db_read_session() as session:
result = session.execute(
select(Plan.id, Plan.property_id)
.where(Plan.property_id.in_(estimated_epc_ids))
select(PlanModel.id, PlanModel.property_id).where(
PlanModel.property_id.in_(estimated_epc_ids)
)
)
plans = [
{
"plan_id": row.id,
"property_id": row.property_id,
} for row in result
}
for row in result
]
df = pd.DataFrame(plans)
@ -96,12 +110,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendation_materials
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation_materials rm
USING plan_recommendations pr
WHERE rm.recommendation_id = pr.recommendation_id
AND pr.plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -109,10 +125,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plan_recommendations
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
"""),
"""
),
params,
)
@ -120,14 +138,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# recommendations (only those used by these plans)
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM recommendation r
WHERE r.id IN (
SELECT DISTINCT recommendation_id
FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
)
"""),
"""
),
params,
)
@ -135,17 +155,21 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
# plans LAST
# ----------------------------
session.execute(
text("""
text(
"""
DELETE FROM plan
WHERE id = ANY(:plan_ids)
"""),
"""
),
params,
)
# Store the SAL
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 "
"sal.xlsx")
filename = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 "
"sal.xlsx"
)
with pd.ExcelWriter(filename) as writer:
sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
@ -164,34 +188,36 @@ with pd.ExcelWriter(filename) as writer:
b1 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 1"
sheet_name="batch 1",
)
b2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 2"
sheet_name="batch 2",
)
b3 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 3"
sheet_name="batch 3",
)
b4 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 4"
sheet_name="batch 4",
)
b5 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
"sal.xlsx",
sheet_name="batch 5"
sheet_name="batch 5",
)
# Batch 6 should be the remaining
total = pd.concat([b1, b2, b3, b4, b5])
remaining = sal[~sal["epc_os_uprn"].isin(total["epc_os_uprn"].values)]
# Create new output
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/"
"20260107 corrected batch 6 sal.xlsx")
filename = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/"
"20260107 corrected batch 6 sal.xlsx"
)
with pd.ExcelWriter(filename) as writer:
sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
@ -206,6 +232,4 @@ with pd.ExcelWriter(filename) as writer:
b5.to_excel(writer, sheet_name="batch 5", index=False)
remaining.to_excel(writer, sheet_name="batch 6", index=False)
all_together = pd.concat(
[b1, b2, b3, b4, b5, remaining]
)
all_together = pd.concat([b1, b2, b3, b4, b5, remaining])

View file

@ -110,14 +110,17 @@ import pandas as pd
# Solar PV savings - we need the amount of solar PV bill savings
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
from backend.app.db.models.recommendations import (
Recommendation,
PlanModel,
PlanRecommendations,
RecommendationMaterials,
)
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from collections import defaultdict
PORTFOLIO_ID = 485 # Peabody
SCENARIOS = [
970
]
SCENARIOS = [970]
scenario_names = {
970: "EPC C - no solid floor, ashp 3.0",
}
@ -130,22 +133,26 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Properties
# --------------------
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id
).all()
properties_query = (
session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
)
properties_data = [
{
**{col.name: getattr(p.PropertyModel, col.name)
for col in PropertyModel.__table__.columns},
**{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns},
**{
col.name: getattr(p.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(p.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for p in properties_query
]
@ -153,12 +160,12 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Plans
# --------------------
plans_query = session.query(Plan).filter(
Plan.scenario_id.in_(scenario_ids)
).all()
plans_query = (
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
)
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
for plan in plans_query
]
@ -167,27 +174,29 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Recommendations (NO materials yet)
# --------------------
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan,
Plan.id == PlanRecommendations.plan_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),
Recommendation.already_installed.is_(False)
).all()
recommendations_query = (
session.query(Recommendation, PlanModel.scenario_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),
Recommendation.already_installed.is_(False),
)
.all()
)
recommendations_data = [
{
**{col.name: getattr(r.Recommendation, col.name)
for col in Recommendation.__table__.columns},
**{
col.name: getattr(r.Recommendation, col.name)
for col in Recommendation.__table__.columns
},
"scenario_id": r.scenario_id,
"materials": [] # placeholder
"materials": [], # placeholder
}
for r in recommendations_query
]
@ -197,23 +206,25 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Recommendation materials (SEPARATE QUERY)
# --------------------
materials_query = session.query(
RecommendationMaterials
).filter(
RecommendationMaterials.recommendation_id.in_(recommendation_ids)
).all()
materials_query = (
session.query(RecommendationMaterials)
.filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids))
.all()
)
# Group materials by recommendation_id
materials_by_recommendation = defaultdict(list)
for m in materials_query:
materials_by_recommendation[m.recommendation_id].append({
"material_id": m.material_id,
"depth": m.depth,
"quantity": m.quantity,
"quantity_unit": m.quantity_unit,
"estimated_cost": m.estimated_cost,
})
materials_by_recommendation[m.recommendation_id].append(
{
"material_id": m.material_id,
"depth": m.depth,
"quantity": m.quantity,
"quantity_unit": m.quantity_unit,
"estimated_cost": m.estimated_cost,
}
)
# Attach materials safely (no filtering side effects)
for r in recommendations_data:
@ -236,12 +247,11 @@ with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer:
recommendations_df.to_excel(writer, sheet_name="recommendations", index=False)
properties_df.to_excel(writer, sheet_name="properties", index=False)
# solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
# average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
# # Check tenures
# initial_asset_data = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "

View file

@ -4,7 +4,7 @@ import pandas as pd
full_sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
"SAL/Depracated/20260107 corrected batch 6 sal.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
# ------Pull in the reduced sample ------
@ -12,7 +12,7 @@ full_sal = pd.read_excel(
reduced_sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - "
"ownership filtered sal.xlsx",
sheet_name="Standardised Asset List"
sheet_name="Standardised Asset List",
)
# ------ Pull in the confirmed ownership column from Peabody ------
@ -20,18 +20,20 @@ new_asset_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
"- Peabody "
"- Data Extracts for Domna v2.xlsx",
sheet_name="Properties"
sheet_name="Properties",
)
correct_sample = new_asset_data[
~new_asset_data["AH Tenure"].isin(
["Commercial",
"Freeholder",
"HOMEBUY / EQUITY LOAN",
"Leaseholder",
"Outright Sale",
"SHARED EQUITY",
"Shared Ownership"]
[
"Commercial",
"Freeholder",
"HOMEBUY / EQUITY LOAN",
"Leaseholder",
"Outright Sale",
"SHARED EQUITY",
"Shared Ownership",
]
)
].copy()
@ -41,9 +43,7 @@ stuff_to_add = correct_sample[
~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values)
]["UPRN"].values
sal_to_add = full_sal[
full_sal["domna_property_id"].isin(stuff_to_add)
].copy()
sal_to_add = full_sal[full_sal["domna_property_id"].isin(stuff_to_add)].copy()
# ------- Stuff to remove -------
stuff_to_remove = reduced_sal[
@ -88,7 +88,7 @@ from backend.app.db.models.portfolio import PropertyModel
from backend.app.db.connection import db_session, db_read_session
from sqlalchemy import select, func
from sqlalchemy.orm import Session
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist()

View file

@ -7,7 +7,7 @@ from sqlalchemy.sql import true
from backend.app.db.utils import row2dict
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
from backend.app.db.models.recommendations import Recommendation
from backend.app.db.models.recommendations import Plan
from backend.app.db.models.recommendations import PlanModel
from backend.app.utils import sap_to_epc
EPC_COLOURS = {
@ -17,7 +17,7 @@ EPC_COLOURS = {
"D": "#fdd401",
"E": "#fdab67",
"F": "#ee8023",
"G": "#e71437"
"G": "#e71437",
}
@ -33,22 +33,27 @@ def get_properties_with_default_recommendations(session: Session, portfolio_id:
its associated default recommendations if any.
"""
# Adjust the join to correctly filter recommendations while including all properties
query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation,
(Recommendation.property_id == PropertyModel.id) & (
Recommendation.default == true())) \
.filter(PropertyModel.portfolio_id == portfolio_id) \
query = (
session.query(PropertyModel, Recommendation)
.outerjoin(
Recommendation,
(Recommendation.property_id == PropertyModel.id)
& (Recommendation.default == true()),
)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
)
properties = {}
for property, recommendation in query:
# Ensure the property is added once with an empty list of recommendations initially
if property.id not in properties:
properties[property.id] = row2dict(property)
properties[property.id]['recommendations'] = []
properties[property.id]["recommendations"] = []
# Append recommendations if they exist and meet the criteria (already filtered by the query)
if recommendation and recommendation.default:
properties[property.id]['recommendations'].append(row2dict(recommendation))
properties[property.id]["recommendations"].append(row2dict(recommendation))
return list(properties.values())
@ -62,11 +67,16 @@ def get_property_details_by_portfolio_id(session: Session, portfolio_id: int):
:return: A list of dictionaries, where each dictionary represents a property's details.
Returns an empty list if no property details are found.
"""
property_details = session.query(PropertyDetailsEpcModel).filter(
PropertyDetailsEpcModel.portfolio_id == portfolio_id).all()
property_details = (
session.query(PropertyDetailsEpcModel)
.filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id)
.all()
)
# Convert the SQLAlchemy objects to dictionaries
property_details_dict = [row2dict(pd) for pd in property_details] if property_details else []
property_details_dict = (
[row2dict(pd) for pd in property_details] if property_details else []
)
return property_details_dict
@ -80,7 +90,9 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
:return: A list of dictionaries, where each dictionary represents a plan.
Returns an empty list if no plans are found.
"""
plans = session.query(Plan).filter(Plan.portfolio_id == portfolio_id).all()
plans = (
session.query(PlanModel).filter(PlanModel.portfolio_id == portfolio_id).all()
)
# Convert the SQLAlchemy objects to dictionaries
plans_dict = [row2dict(plan) for plan in plans] if plans else []
@ -88,7 +100,14 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
return plans_dict
def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
def plot_epc_distribution(
df,
customer_key,
title="Your Units",
background_color="white",
bar_height=0.4,
font_size=15,
):
"""
Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes.
Allows setting the plot background color and dynamically adjusts text size and bar spacing.
@ -100,75 +119,113 @@ def plot_epc_distribution(df, customer_key, title='Your Units', background_color
:param font_size: Base font size for text annotations (default 15)
"""
# Calculate dynamic figure size or adjust based on preferences
square_size = max(6, len(df) * 0.6) # Ensure minimum size and adjust based on number of entries
square_size = max(
6, len(df) * 0.6
) # Ensure minimum size and adjust based on number of entries
fig, ax = plt.subplots(figsize=(square_size, square_size))
fig.patch.set_facecolor(background_color) # Set figure background color
ax.set_facecolor(background_color) # Set axes background color
df['percentage'] = df['percentage'].round(1) # Round the percentage values to 1 decimal place
df_sorted = df.sort_values('percentage', ascending=True)
df["percentage"] = df["percentage"].round(
1
) # Round the percentage values to 1 decimal place
df_sorted = df.sort_values("percentage", ascending=True)
# Plot bars with specified height for adjustable thickness
bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'],
color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height)
bars = ax.barh(
df_sorted["current_epc_rating"],
df_sorted["percentage"],
color=df_sorted["current_epc_rating"].map(EPC_COLOURS),
edgecolor="none",
height=bar_height,
)
epc_rating_font_size = font_size * 2 # EPC rating font size larger than base font size
count_percentage_font_size = font_size # Count (percentage) font size as base font size
epc_rating_font_size = (
font_size * 2
) # EPC rating font size larger than base font size
count_percentage_font_size = (
font_size # Count (percentage) font size as base font size
)
# Annotate bars with EPC ratings inside and count with percentage values outside
for index, bar in enumerate(bars):
width = bar.get_width()
epc_rating = df_sorted.iloc[index]['current_epc_rating']
count = df_sorted.iloc[index]['count']
percentage = df_sorted.iloc[index]['percentage']
epc_rating = df_sorted.iloc[index]["current_epc_rating"]
count = df_sorted.iloc[index]["count"]
percentage = df_sorted.iloc[index]["percentage"]
# EPC rating inside the bar with increased font size
ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2,
f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size)
ax.text(
width - (width * 0.05),
bar.get_y() + bar.get_height() / 2,
f"{epc_rating}",
va="center",
ha="right",
color="white",
fontsize=epc_rating_font_size,
)
# Count and percentage outside the bar, original font size
ax.text(width + 1, bar.get_y() + bar.get_height() / 2,
f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size)
ax.text(
width + 1,
bar.get_y() + bar.get_height() / 2,
f"{count} ({percentage}%)",
va="center",
color="black",
fontsize=count_percentage_font_size,
)
ax.set_title(title, fontsize=font_size * 1.2) # Adjust title font size proportionally
ax.tick_params(axis='x', which='both', bottom=False, top=False,
labelbottom=False) # Remove x-axis tick marks and values
ax.tick_params(axis='y', which='both', left=False, right=False,
labelleft=False) # Remove y-axis tick marks and labels
ax.spines['top'].set_visible(False) # Remove top spine
ax.spines['right'].set_visible(False) # Remove right spine
ax.spines['left'].set_visible(False) # Remove left spine
ax.spines['bottom'].set_visible(False) # Remove bottom spine
ax.set_title(
title, fontsize=font_size * 1.2
) # Adjust title font size proportionally
ax.tick_params(
axis="x", which="both", bottom=False, top=False, labelbottom=False
) # Remove x-axis tick marks and values
ax.tick_params(
axis="y", which="both", left=False, right=False, labelleft=False
) # Remove y-axis tick marks and labels
ax.spines["top"].set_visible(False) # Remove top spine
ax.spines["right"].set_visible(False) # Remove right spine
ax.spines["left"].set_visible(False) # Remove left spine
ax.spines["bottom"].set_visible(False) # Remove bottom spine
plt.tight_layout() # Adjust layout
plt.show()
# Save the figure as an image
figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png'
fig.savefig(figure_path, bbox_inches='tight')
figure_path = f"etl/customers/{customer_key}/epc_distribution_plot.png"
fig.savefig(figure_path, bbox_inches="tight")
plt.close(fig) # Close the figure to free memory
return fig, figure_path
def save_plot_to_image(figure, path='plot.png'):
def save_plot_to_image(figure, path="plot.png"):
"""
Saves a matplotlib figure to an image file for insertion into PowerPoint.
"""
figure.savefig(path, bbox_inches='tight')
figure.savefig(path, bbox_inches="tight")
plt.close(figure)
def save_figure_as_image(figure, filename='temp_plot.png'):
def save_figure_as_image(figure, filename="temp_plot.png"):
"""
Saves a matplotlib figure to an image file.
"""
figure.savefig(filename, dpi=300)
plt.close(figure) # Close the figure to prevent it from displaying in notebooks or Python environments
plt.close(
figure
) # Close the figure to prevent it from displaying in notebooks or Python environments
def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inches(1), width_inches=Inches(8),
height_inches=Inches(2)):
def add_commentary_with_bullets(
slide,
commentary,
top_inches,
left_inches=Inches(1),
width_inches=Inches(8),
height_inches=Inches(2),
):
"""
Adds commentary with bullet points to a slide.
@ -179,7 +236,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche
:param width_inches: The width of the commentary text box.
:param height_inches: The height of the commentary text box.
"""
txBox = slide.shapes.add_textbox(left_inches, top_inches, width_inches, height_inches)
txBox = slide.shapes.add_textbox(
left_inches, top_inches, width_inches, height_inches
)
tf = txBox.text_frame
# Configure text frame
@ -192,7 +251,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche
for i, section in enumerate(sections):
if i > 0:
p = tf.add_paragraph() # Add a new paragraph for each section after the first
p = (
tf.add_paragraph()
) # Add a new paragraph for each section after the first
else:
p = tf.paragraphs[0] # Use the first paragraph for the first section
p.text = section
@ -215,7 +276,9 @@ def add_slide_with_image(prs, title, img_path=None, commentary=None):
# Determine the position of the commentary text box based on whether an image is included
if img_path:
# Add the image
slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5))
slide.shapes.add_picture(
img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5)
)
# Position for commentary when image is present
commentary_top = Inches(6)
else:
@ -237,16 +300,18 @@ def create_powerpoint(data, save_location):
prs = Presentation()
for slide, slide_data in data.items():
slide_figure_path = data[slide].get('image_path')
text = data[slide].get('text')
title = data[slide].get('title', "")
slide_figure_path = data[slide].get("image_path")
text = data[slide].get("text")
title = data[slide].get("title", "")
add_slide_with_image(prs, title, slide_figure_path, text)
# Save the presentation
prs.save(save_location)
def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target):
def create_recommendations_summary(
recommendations_df, properties_df, property_details_df, sap_target
):
# Aggregate the impact of the recommendations
# We want:
# Total number of sap points
@ -254,40 +319,52 @@ def create_recommendations_summary(recommendations_df, properties_df, property_d
# total bill savings
# total cost
# Total Co2 impact
recommendations_summary = recommendations_df.groupby(["property_id"]).agg(
total_sap_points=("sap_points", "sum"),
total_valuation_impact=("property_valuation_increase", "sum"),
total_bill_savings=("energy_cost_savings", "sum"),
total_cost=("estimated_cost", "sum"),
total_carbon=("co2_equivalent_savings", "sum"),
adjusted_heat_demand=("adjusted_heat_demand", "sum")
).reset_index()
recommendations_summary = (
recommendations_df.groupby(["property_id"])
.agg(
total_sap_points=("sap_points", "sum"),
total_valuation_impact=("property_valuation_increase", "sum"),
total_bill_savings=("energy_cost_savings", "sum"),
total_cost=("estimated_cost", "sum"),
total_carbon=("co2_equivalent_savings", "sum"),
adjusted_heat_demand=("adjusted_heat_demand", "sum"),
)
.reset_index()
)
# Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill
recommendations_summary = recommendations_summary.merge(
properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
how="left"
properties_df[["id", "uprn", "current_sap_points"]].rename(
columns={"id": "property_id"}
),
on="property_id",
how="left",
)
recommendations_summary["expected_sap_points"] = (
recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
recommendations_summary["current_sap_points"]
+ recommendations_summary["total_sap_points"]
)
recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply(
lambda x: sap_to_epc(x)
recommendations_summary["expected_epc_rating"] = recommendations_summary[
"expected_sap_points"
].apply(lambda x: sap_to_epc(x))
recommendations_summary["sap_difference"] = (
sap_target - recommendations_summary["expected_sap_points"]
)
recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"]
if property_details_df is not None:
recommendations_summary = recommendations_summary.merge(
property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename(
property_details_df[
["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]
].rename(
columns={
"id": "property_id",
"co2_emissions": "current_co2",
"adjusted_energy_consumption": "current_energy",
"energy_bill": "current_energy_bill"
"energy_bill": "current_energy_bill",
}
),
on="uprn",
how="left"
how="left",
)
return recommendations_summary

View file

@ -3,7 +3,7 @@
### 1. Create the Lambda scaffold
- Copy the template:
cp -r lambda/_template lambda/<lambda_name>
`cp -r lambda/_template lambda/<lambda_name>`
---
@ -12,8 +12,7 @@
infrastructure/terraform/shared/main.tf
- Apply the shared stack
- This requires commenting 'if env.stage == "prod"' in .github/workflows/deploy_terraform.yml
- Create a PR to deploy this to main then dev in order to deploy the shared stack
- Verify the ECR repository exists in AWS

View file

@ -1,3 +1,30 @@
# ==============================================================================
# TEMPLATE: Lambda Configuration with Optional S3 IAM Policy
# ==============================================================================
# Instructions:
# 1. Replace "REPLACE ME" with your lambda name (e.g., "my-lambda-name")
# 2. Add any additional environment variables as needed
# 3. To attach S3 IAM policies from shared state:
# - Uncomment the S3 policy attachment section below
# - Update the policy_arn to match the output from shared/main.tf
# - Available shared outputs (examples):
# - data.terraform_remote_state.shared.outputs.condition_etl_s3_read_arn
# - data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn
# 4. To create a NEW S3 policy:
# - Add a new module "lambda_s3_policy" in shared/main.tf using the
# s3_iam_policy module (see examples in shared/main.tf)
# - Then reference it here using data.terraform_remote_state.shared.outputs
# ==============================================================================
data "terraform_remote_state" "shared" {
backend = "s3"
config = {
bucket = "assessment-model-terraform-state"
key = "env:/${var.stage}/terraform.tfstate"
region = "eu-west-2"
}
}
module "lambda" {
source = "../modules/lambda_with_sqs"
@ -6,9 +33,35 @@ module "lambda" {
image_uri = local.image_uri
# Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000)
maximum_concurrency = var.maximum_concurrency
batch_size = var.batch_size
environment = {
STAGE = var.stage
LOG_LEVEL = "info"
}
}
# ======================================================================
# OPTIONAL: Attach S3 IAM policy to Lambda execution role
# ======================================================================
# Uncomment and configure the resource below to attach S3 permissions
#
# Example 1: Attach existing policy from shared state
# resource "aws_iam_role_policy_attachment" "lambda_s3_policy" {
# role = module.lambda.role_name
# policy_arn = data.terraform_remote_state.shared.outputs.YOUR_POLICY_OUTPUT_NAME_arn
# }
#
# Example 2: Attach multiple policies
# resource "aws_iam_role_policy_attachment" "lambda_read_policy" {
# role = module.lambda.role_name
# policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn
# }
#
# resource "aws_iam_role_policy_attachment" "lambda_write_policy" {
# role = module.lambda.role_name
# policy_arn = data.terraform_remote_state.shared.outputs.another_policy_arn
# }

View file

@ -17,6 +17,16 @@ variable "image_digest" {
description = "Image digest (sha256:...)"
}
variable "maximum_concurrency" {
type = number
default = null
description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit."
}
variable "batch_size" {
type = number
default = 1
}
locals {
image_uri = "${var.ecr_repo_url}@${var.image_digest}"

View file

@ -1,3 +1,19 @@
data "terraform_remote_state" "shared" {
backend = "s3"
config = {
bucket = "assessment-model-terraform-state"
key = "env:/${var.stage}/terraform.tfstate"
region = "eu-west-2"
}
}
data "aws_secretsmanager_secret_version" "db_credentials" {
secret_id = "${var.stage}/assessment_model/db_credentials"
}
locals {
db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
}
module "address2uprn" {
source = "../modules/lambda_with_sqs"
@ -6,9 +22,37 @@ module "address2uprn" {
image_uri = local.image_uri
timeout = 900
environment = {
STAGE = var.stage
LOG_LEVEL = "info"
}
# Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000)
maximum_concurrency = var.maximum_concurrency
environment = merge(
{
STAGE = var.stage
LOG_LEVEL = "info"
DB_USERNAME = local.db_credentials.db_assessment_model_username
DB_PASSWORD = local.db_credentials.db_assessment_model_password
GOOGLE_SOLAR_API_KEY = "test"
SAP_PREDICTIONS_BUCKET = "test"
CARBON_PREDICTIONS_BUCKET = "test"
HEAT_PREDICTIONS_BUCKET = "test"
HEATING_KWH_PREDICTIONS_BUCKET = "test"
HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
API_KEY = "test"
ENVIRONMENT = "test"
SECRET_KEY = "test"
PLAN_TRIGGER_BUCKET = "test"
DATA_BUCKET = "test"
ENGINE_SQS_URL = "test"
ENERGY_ASSESSMENTS_BUCKET = "test"
S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
},
)
}
# Attach S3 read policy to the Lambda execution role
resource "aws_iam_role_policy_attachment" "address2uprn_read_and_write" {
role = module.address2uprn.role_name
policy_arn = data.terraform_remote_state.shared.outputs.address_2_uprn_s3_read_and_write_arn
}

Some files were not shown because too many files have changed in this diff Show more