temp save

This commit is contained in:
Jun-te Kim 2026-01-20 16:25:03 +00:00
commit 2072285bba
156 changed files with 15025 additions and 1986 deletions

View file

@ -1,4 +1,8 @@
<<<<<<< HEAD
FROM python:3.11.10-slim-bullseye
=======
FROM python:3.11.10-bullseye
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
ARG USER=vscode
@ -10,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential pkg-config automake autoconf libtool \
&& rm -rf /var/lib/apt/lists/*
<<<<<<< HEAD
# 2) Build and install libpostal from source
RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
&& cd /tmp/libpostal \
@ -19,12 +24,24 @@ RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
&& make install \
&& ldconfig \
&& rm -rf /tmp/libpostal
=======
# # 2) Build and install libpostal from source
# RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
# && cd /tmp/libpostal \
# && ./bootstrap.sh \
# && ./configure --datadir=/usr/local/share/libpostal \
# && make -j"$(nproc)" \
# && make install \
# && ldconfig \
# && rm -rf /tmp/libpostal
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
# 3) Create the user and grant sudo privileges
RUN useradd -m -s /usr/bin/bash ${USER} \
&& echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \
&& chmod 0440 /etc/sudoers.d/${USER}
<<<<<<< HEAD
# 4) Python deps - if you want to run assest list
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
ADD asset_list/requirements.txt requirements.txt
@ -36,10 +53,28 @@ RUN pip install -r requirements.txt
# ADD backend/app/requirements/requirements.txt requirements2.txt
# RUN cat requirements1.txt requirements2.txt > requirements.txt
# RUN pip install -r requirements.txt
=======
# # 4) Python deps - if you want to run assest list
# ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
# ADD asset_list/requirements.txt requirements.txt
# RUN pip install -r requirements.txt
#
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
ADD backend/engine/requirements.txt requirements1.txt
ADD backend/app/requirements/requirements.txt requirements2.txt
ADD .devcontainer/requirements.txt requirements3.txt
RUN cat requirements1.txt requirements2.txt requirements3.txt > requirements.txt
RUN pip install -r requirements.txt
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
# 5) Workdir
WORKDIR /workspaces/model
# 6) Make Python find your package
# Add project root to PYTHONPATH for all processes
<<<<<<< HEAD
ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}
=======
ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d

View file

@ -23,8 +23,18 @@
"4ops.terraform",
"fabiospampinato.vscode-todo-plus",
"jgclark.vscode-todo-highlight",
<<<<<<< HEAD
"corentinartaud.pdfpreview"
]
}
=======
"corentinartaud.pdfpreview",
"ms-python.vscode-python-envs"
]
}
},
"containerEnv": {
"PYTHONFLAGS": "-Xfrozen_modules=off"
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
}
}

View file

@ -1,3 +1,4 @@
<<<<<<< HEAD
# #!/bin/bash
# poetry install;
@ -25,3 +26,19 @@
# "$SETTINGS_FILE" > "$SETTINGS_FILE.tmp" && mv "$SETTINGS_FILE.tmp" "$SETTINGS_FILE"
# echo "✅ Updated VS Code to use Poetry environment: $VENV_PATH"
=======
mkdir -p ~/.ipython/profile_default/startup
cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py
from dotenv import load_dotenv
import os
# Adjust path as needed
env_path = "/workspaces/model/backend/.env"
if os.path.exists(env_path):
load_dotenv(env_path)
print("✔ Loaded .env into Jupyter kernel")
else:
print("⚠ No .env file found to load")
EOF
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d

View file

@ -0,0 +1,20 @@
# fastapi
fastapi==0.115.2
sqlalchemy==2.0.36
pydantic-settings==2.6.0
psycopg2-binary==2.9.10
python-jose==3.3.0
cryptography==43.0.3
mangum==0.19.0
# AWS
boto3==1.35.44
# Data
openpyxl==3.1.2
# Basic
pytz
uvicorn[standard]
sqlmodel
# Testing
pytest==9.0.2
pytest-cov==7.0.0
ipykernel>=6.25,<7

6
.gitignore vendored
View file

@ -242,6 +242,8 @@ fabric.properties
local_data/*
/local_data/*
etl/epc/local_data/*
/backend/condition/sample_data/lbwf/*
/backend/condition/sample_data/peabody/*
*.DS_Store
infrastructure/terraform/.terraform*
@ -275,4 +277,6 @@ cache/
*/.idea
*.png
*.pptx
*.pptx
local_data*

15
.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
}
]
}

View file

@ -9,6 +9,12 @@
"path": "/bin/bash"
}
},
<<<<<<< HEAD
=======
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.testing.pytestArgs": ["-s", "-q", "--no-cov"]
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
// Hot reload setting that needs to be in user settings
// "jupyter.runStartupCommands": [

View file

@ -1,5 +1,4 @@
from etl.epc.settings import DATA_ANOMALY_MATCHES as data_anon_matches
from etl.epc.settings import DATA_ANOMALY_MATCHES as data_anon_matches
class Definitions:

View file

@ -310,6 +310,17 @@ class AssetList:
'NAME OF SURVEYOR'
]
# Solar non-intrusive fields
NON_INTRUSIVES_SOLAR_COLNAMES = [
'PV, ACCESS ISSUE, SEE NOTES', 'ROOF ORIENTATION',
'AREA (m²) OF ROOF WHERE PV WILL BE SITUATED ', 'SHADING',
'Roof Tiles - CONCRETE/SLATE/ROSEMARY',
'NO. OF PANELS (Typical size of 420W panel is 1mx1.7m and need 30cm all the way around panels)',
'SCAFFOLD REQUIRED? IF YES, ARE THERE ANY SURROUNDING ACCESS ISSUES - PLEASE DESCRIBE',
'IF PANELS ARE GOING ON REAR PLEASE CHECK FOR SPACE FOR SCAFFOLDING - DESCRIBE ANY ISSUES BELOW',
'DATE', 'NAME OF SURVEYOR'
]
NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)"
OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility']
@ -462,6 +473,8 @@ class AssetList:
self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns
self.solar_non_intrusives_present = "AREA (m²) OF ROOF WHERE PV WILL BE SITUATED" in self.raw_asset_list.columns
# Names of columns
self.landlord_property_id = landlord_property_id
self.address1_colname = address1_colname
@ -775,6 +788,9 @@ class AssetList:
if self.new_format_non_insturives_present_v2:
non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2
if self.solar_non_intrusives_present:
non_intrusive_columns += self.NON_INTRUSIVES_SOLAR_COLNAMES
if self.old_format_non_intrusives_present:
# We check if we have the ECO Eligibility column, which we might not have
non_intrusive_columns = [
@ -947,7 +963,7 @@ class AssetList:
if self.phase:
# We filter on just the properties that have had an inspection
if self.new_format_non_insturives_present_v2:
if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present:
self.standardised_asset_list = self.standardised_asset_list[
~self.standardised_asset_list['NAME OF SURVEYOR'].isin(
["YET TO BE SURVEYED", "", None]
@ -982,7 +998,15 @@ class AssetList:
# Keep a record of duplicates
self.duplicated_addresses = self.standardised_asset_list[
self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated()
][[self.DOMNA_PROPERTY_ID, self.address1_colname, self.postcode_colname]].copy()
][[self.DOMNA_PROPERTY_ID, self.full_address_colname, self.address1_colname, self.postcode_colname]].copy()
df = self.standardised_asset_list[
self.standardised_asset_list[self.DOMNA_PROPERTY_ID].isin(
self.duplicated_addresses[self.DOMNA_PROPERTY_ID])
][[self.landlord_property_id, self.DOMNA_PROPERTY_ID, self.full_address_colname, self.address1_colname,
self.postcode_colname]].copy()
df = df.sort_values(by=[self.DOMNA_PROPERTY_ID])
self.standardised_asset_list = self.standardised_asset_list[
~self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated()
@ -1346,10 +1370,10 @@ class AssetList:
# for identifying cavity jobs
if self.non_intrusives_present and not self.old_format_non_intrusives_present:
if self.new_format_non_insturives_present_v2:
if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present:
existing_solar_non_intrusives_check = (
self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"].str.strip().isin(
["ALREADY HAS SOLAR PV"]
["ALREADY HAS SOLAR PV", "ALREADY HAS PV"]
)
)
else:
@ -1788,9 +1812,16 @@ class AssetList:
)
)
not_a_flat = (
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "flat"
)
# Determine if the client gave us property type in the first place
if all(self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "unknown"):
# Use EPC
not_a_flat = (
self.standardised_asset_list[self.EPC_API_DATA_NAMES["property-type"]] != "Flat"
)
else:
not_a_flat = (
self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "flat"
)
solar_roof_meets_criteria = (
self.standardised_asset_list["solar_epc_roof_insulated"] |
@ -3457,7 +3488,13 @@ class AssetList:
raise ValueError("No installer column found in master data")
measure_mix_col = "MEASURE COMBO"
town_colname = "TOWN" if "TOWN" in master_data.columns else 'Town/Area'
if "TOWN" in master_data.columns:
town_colname = "TOWN"
elif 'Town/Area' in master_data.columns:
town_colname = 'Town/Area'
else:
town_colname = "Town/City"
logger.info("Matching master data to asset list")
matched = []

View file

@ -57,6 +57,7 @@ def app():
EPC recommendations
Property UPRN
"""
<<<<<<< HEAD
data_folder = ("/workspaces/model/asset_list")
data_filename = "assets.xlsx"
sheet_name = "Sheet1"
@ -70,11 +71,31 @@ def app():
landlord_os_uprn = None
landlord_property_type = None
landlord_built_form = None
=======
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Warmfront/SCIS")
data_filename = "SCIS_Historic_Deemed_Combined_Workings.xlsx"
sheet_name = "SCIS"
postcode_column = 'POSTCODE'
address1_column = "NO"
address1_method = None
fulladdress_column = None
address_cols_to_concat = ["NO", "Street / Block Name", "Town/Area"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "PROPERTY TYPE As per table emailed"
landlord_built_form = "PROPERTY TYPE As per table emailed"
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
<<<<<<< HEAD
landlord_property_id = "LLUPRN"
=======
landlord_property_id = "Row ID"
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
@ -90,6 +111,77 @@ def app():
asset_list_header = 0
landlord_block_reference = None
<<<<<<< HEAD
=======
# Peabody data for cleaning
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation")
data_filename = "to_standardise_uprns.xlsx"
sheet_name = "Sheet1"
postcode_column = 'Postcode'
address1_column = "Address 1"
address1_method = None
fulladdress_column = None
address_cols_to_concat = ["Address 1", "Address 2", "Address 3"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Type"
landlord_built_form = "Attachment"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Org Ref"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_id_colnames = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
# Lambeth:
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th"
# data_filename = "lambeth_sw2_leigham court estate.xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Postcode'
# address1_column = "Address"
# address1_method = None
# fulladdress_column = None
# address_cols_to_concat = ["Address"]
# missing_postcodes_method = None
# landlord_year_built = None
# landlord_os_uprn = None
# landlord_property_type = None
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_roof_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "row_id"
# landlord_sap = None
# outcomes_filename = None
# outcomes_sheetname = None
# outcomes_postcode = None
# outcomes_houseno = None
# outcomes_id = None
# outcomes_address = None
# master_filepaths = []
# master_id_colnames = []
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = None
# asset_list_header = 0
# landlord_block_reference = None
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}
@ -228,22 +320,22 @@ def app():
)
# We now retrieve any failed properties
chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)]
epc_data_failed, _, _ = get_data(
df=chunk_failed,
row_id_name=asset_list.DOMNA_PROPERTY_ID,
uprn_column=AssetList.STANDARD_UPRN,
fulladdress_column=AssetList.STANDARD_FULL_ADDRESS,
address1_column=AssetList.STANDARD_ADDRESS_1,
postcode_column=AssetList.STANDARD_POSTCODE,
property_type_column=AssetList.STANDARD_PROPERTY_TYPE,
built_form_column=AssetList.STANDARD_BUILT_FORM,
manual_uprn_map=manual_uprn_map,
epc_api_only=epc_api_only,
epc_auth_token=EPC_AUTH_TOKEN
)
epc_data_chunk.extend(epc_data_failed)
# chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)]
# epc_data_failed, _, _ = get_data(
# df=chunk_failed,
# row_id_name=asset_list.DOMNA_PROPERTY_ID,
# uprn_column=AssetList.STANDARD_UPRN,
# fulladdress_column=AssetList.STANDARD_FULL_ADDRESS,
# address1_column=AssetList.STANDARD_ADDRESS_1,
# postcode_column=AssetList.STANDARD_POSTCODE,
# property_type_column=AssetList.STANDARD_PROPERTY_TYPE,
# built_form_column=AssetList.STANDARD_BUILT_FORM,
# manual_uprn_map=manual_uprn_map,
# epc_api_only=epc_api_only,
# epc_auth_token=EPC_AUTH_TOKEN
# )
#
# epc_data_chunk.extend(epc_data_failed)
# Append the failed data to the main data
# Store the chunk locally as a csv
@ -385,6 +477,26 @@ def app():
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
# Determine inspections priority
# solar_jobs = asset_list.standardised_asset_list[~pd.isnull(asset_list.standardised_asset_list["solar_reason"])][
# "domna_postcode"].unique()
# asset_list.standardised_asset_list["in_solar_area"] = asset_list.standardised_asset_list["domna_postcode"].isin(
# solar_jobs
# )
# # Same for cav
# cavity_jobs = asset_list.standardised_asset_list[
# ~pd.isnull(asset_list.standardised_asset_list["cavity_reason"])
# ]["domna_postcode"].unique()
# asset_list.standardised_asset_list["in_cavity_area"] = asset_list.standardised_asset_list["domna_postcode"].isin(
# cavity_jobs
# )
# # We prioritise properties that are in solar areas and cavity areas
# import numpy as np
# asset_list.standardised_asset_list["inspection_priority"] = np.where(
# asset_list.standardised_asset_list["in_solar_area"] | asset_list.standardised_asset_list["in_cavity_area"],
# 1, 2
# )
with pd.ExcelWriter(filename) as writer:
asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
if asset_list.block_analysis_df is not None:
@ -404,4 +516,11 @@ def app():
if not asset_list.geographical_areas.empty:
asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
<<<<<<< HEAD
print("done")
=======
# Store dupes
if not asset_list.duplicated_addresses.empty:
asset_list.duplicated_addresses.to_excel(writer, sheet_name="Duplicate Properties", index=False)
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d

View file

@ -438,6 +438,86 @@ BUILT_FORM_MAPPINGS = {
'Maisonette - Mid Terrace': 'mid-terrace',
'Chalet - Wheelchair': 'unknown',
'Studio Flat': 'unknown',
'Bungalow - Attached': 'semi-detached'
'Bungalow - Attached': 'semi-detached',
'ND': 'unknown',
'Maisonette: Mid Terrace: Mid Floor': 'mid-floor',
'Maisonette: Semi Detached: Ground Floor': 'semi-detached',
'Maisonette: Enclosed Mid Terrace: Ground Floor': 'enclosed mid-terrace',
'Maisonette: Enclosed End Terrace: Ground Floor': 'end-terrace',
'Maisonette: Mid Terrace: Ground Floor': 'mid-terrace',
'Flat: Semi Detached: Basement': 'semi-detached',
'Maisonette: Semi Detached: Top Floor': 'semi-detached',
'Maisonette: Enclosed Mid Terrace: Mid Floor': 'enclosed mid-terrace',
'Flat: Detached: Basement': 'detached',
'Maisonette: Enclosed Mid Terrace: Top Floor': 'enclosed mid-terrace',
'Maisonette: End Terrace: Top Floor': 'top-floor',
'House: Mid Terrace: Ground Floor': 'ground floor',
'Maisonette: Semi Detached: Mid Floor': 'detached',
'Maisonette: Detached: Mid Floor': 'detached',
'Bungalow: EnclosedMidTerrace': 'enclosed mid-terrace',
'House: EnclosedMidTerrace': 'enclosed mid-terrace',
'EnclosedMidTerrace': 'enclosed mid-terrace',
'EnclosedEndTerrace': 'enclosed end-terrace',
'EndTerrace': 'end-terrace',
'SemiDetached': 'semi-detached',
'MidTerrace': 'mid-terrace',
'1st FLOOR FLAT': 'mid-floor',
'END TERRACE HOUSE': 'end-terrace',
'BUNGALOW-END TERRACE': 'end-terrace',
'BUNGALOW END TERRACE': 'end-terrace',
'END-TERRACE': 'end-terrace',
'SEMI DETACHED': 'semi-detached',
'Mid flat Ground Floor': 'ground floor',
'MID TERRACED': 'mid-terrace',
'Mid Terrace bungalow': 'mid-terrace',
'BUNGLAOW SEMI DETACHED': 'detached',
'Bungalow ENd Terrace': 'end-terrace',
'Bungalow Semi detached': 'detached',
'BUNGALOW - SEMI DETACHED': 'detached',
'Bungalow mid terrace': 'mid-terrace',
'BUNGALOW - MID TERRACED': 'mid-terrace',
'BUNGALOW - MID TERRACE': 'mid-terrace',
'Bungalow end terrace': 'end-terrace',
'BUNGALOW SEMI-DETACHED': 'detached',
'MID TERR': 'mid-terrace',
'Bungalow - mid terrace': 'mid-terrace',
'MID-TERRACE': 'mid-terrace',
'Bunagalow Semi Detached': 'semi-detached',
'SEMI DETACHED BUNGALOW': 'semi-detached',
'MID TERRACE HOUSE': 'mid-terrace',
'END - TERRACE': 'end-terrace',
'BUNGALOW-SEMI DETACHED': 'semi-detached',
'Semi-Detached': 'semi-detached',
'End-Terrace house': 'end-terrace',
'BUNGALOW MID TERRACE': 'mid-terrace',
'SEMI DETACHED HOUSE': 'semi-detached',
'BUNGALOW SEMI DETACHED': 'detached',
'MID - TERRACE': 'mid-terrace',
'3 EXT WALL FLAT': 'end-terrace',
'3 Ext wall flat': 'end-terrace',
'3 EX WALL FLAT': 'end-terrace',
'2 ext wall flats': 'mid-terrace',
'2 EXT WALLS': 'mid-terrace',
'3.EXT.WALL FLAT': 'end-terrace',
'FLAT 3 WALLS': 'end-terrace',
'2 Ext Wall flat': 'mid-terrace',
'DETATCHED HOUSE': 'detached',
'3 EXT. WALL FLAT': 'end-terrace',
'3 ext wall flat': 'end-terrace',
'3 EXT WALLS': 'end-terrace',
'3 EXT WALL - NOW 2 EXT': 'unknown',
'3 EXT-WALL FLAT': 'end-terrace',
'FLAT 2 WALLS': 'mid-terrace',
'3 EX WALL MAISONETTE': 'end-terrace',
'3 Ext Wall Flat': 'end-terrace',
'Semi Bungalow': 'semi-detached',
'2 EXT WALL FLAT': 'mid-terrace',
'2.EXT.WALL FLAT': 'mid-terrace',
'2 EXT. WALL FLAT': 'mid-terrace',
}

View file

@ -473,5 +473,27 @@ HEATING_MAPPINGS = {
'Boiler and radiators, oil': 'oil boiler',
'Boiler and radiators, electric': 'electric boiler',
'No system present: electric heaters assumed': 'electric radiators',
'Boiler and radiators, anthracite': 'solid fuel'
'Boiler and radiators, anthracite': 'solid fuel',
'Heat networks Heat networks (mains gas)': 'communal heating',
'ND Oil': 'oil fuel',
'Boiler Biofuel': 'boiler - other fuel',
'Electric (direct acting) room heaters: Water- or oil-filled radiators': 'room heaters',
'Other: Electric ceiling heating': 'electric ceiling',
'Heat Pump: Electric Heat pumps: Air source heat pump with flow temperature <= 35°C': 'air source heat pump',
'Oil room heaters: Room heater, 2000 or later': 'room heaters',
'Electric Underfloor Heating: In screed above insulation (standard or off peak)': 'electric underfloor',
'Heat Pump: Electric Heat pumps: Air source heat pump in other cases': 'air source heat pump',
'Electric Storage Systems: Old (large volume) storage heaters': 'electric storage heaters',
'Gas (including LPG) room heaters: Condensing gas fire': 'room heaters',
'Solid fuel room heaters: Open fire in grate': 'solid fuel',
'Solid fuel room heaters: Open fire with back boiler (no radiators)': 'solid fuel',
'Community Heating Systems: Community heat pump (RdSAP)': 'communal heating',
'Gas (including LPG) room heaters: Gas fire, open flue, 1980 or later (open fronted), sitting proud of, '
'and sealed to, fireplace opening': 'room heaters',
'Boiler: A rated Regular Boiler, System 2: Boiler: C rated Regular Boiler': 'boiler - other fuel',
'Boiler: G rated Combi': 'gas condensing combi'
}

View file

@ -343,5 +343,90 @@ PROPERTY_MAPPING = {
'bungalow': 'bungalow',
'flat': 'flat',
'FLA': 'flat',
'HOU': 'house'
'HOU': 'house',
'Maisonette: Mid Terrace: Mid Floor': 'maisonette',
'Maisonette: Semi Detached: Ground Floor': 'maisonette',
'Maisonette: Enclosed Mid Terrace: Ground Floor': 'maisonette',
'Maisonette: Enclosed End Terrace: Ground Floor': 'maisonette',
'Maisonette: Mid Terrace: Ground Floor': 'maisonette',
'Flat: Semi Detached: Basement': 'flat',
'Maisonette: Semi Detached: Top Floor': 'maisonette',
'Maisonette: Enclosed Mid Terrace: Mid Floor': 'maisonette',
'Flat: Detached: Basement': 'flat',
'Maisonette: Enclosed Mid Terrace: Top Floor': 'maisonette',
'Maisonette: End Terrace: Top Floor': 'maisonette',
'House: Mid Terrace: Ground Floor': 'house',
'Bungalow: EnclosedMidTerrace': 'bungalow',
'Maisonette: Semi Detached: Mid Floor': 'maisonette',
'Maisonette: Detached: Mid Floor': 'maisonette',
'House: EnclosedMidTerrace': 'house',
'3 EXT WALL FLAT': 'flat',
'1st FLOOR FLAT': 'flat',
'3 Ext wall flat': 'flat',
'3 EX WALL FLAT': 'flat',
'END TERRACE HOUSE': 'house',
'BUNGALOW-END TERRACE': 'bungalow',
'BUNGALOW END TERRACE': 'bungalow',
'2 ext wall flats': 'flat',
'Mid flat Ground Floor': 'flat',
'3.EXT.WALL FLAT': 'flat',
'FLAT 3 WALLS': 'flat',
'Mid Terrace bungalow': 'bungalow',
'Bungalow ENd Terrace': 'bungalow',
'2 Ext Wall flat': 'flat',
'DETATCHED HOUSE': 'house',
'Bungalow Semi detached': 'bungalow',
'BUNGALOW - SEMI DETACHED': 'bungalow',
'Bungalow mid terrace': 'bungalow',
'BUNGALOW - MID TERRACED': 'bungalow',
'BUNGALOW - MID TERRACE': 'bungalow',
'Bungalow end terrace': 'bungalow',
'3 EXT. WALL FLAT': 'flat',
'3 ext wall flat': 'flat',
'BUNGALOW SEMI-DETACHED': 'bungalow',
'3 EXT-WALL FLAT': 'flat',
'Bungalow - mid terrace': 'bungalow',
'SEMI DETACHED BUNGALOW': 'bungalow',
'FLAT 2 WALLS': 'flat',
'MID TERRACE HOUSE': 'house',
'3 EX WALL MAISONETTE': 'maisonette',
'BUNGALOW-SEMI DETACHED': 'bungalow',
'3 Ext Wall Flat': 'flat',
'Semi Bungalow': 'bungalow',
'End-Terrace house': 'house',
'BUNGALOW MID TERRACE': 'bungalow',
'Mid-terrace house': 'house',
'SEMI DETACHED HOUSE': 'house',
'Semi-detached house': 'house',
'2 EXT WALL FLAT': 'flat',
'2.EXT.WALL FLAT': 'flat',
'BUNGALOW SEMI DETACHED': 'bungalow',
'2 EXT. WALL FLAT': 'flat',
'END-TERRACE': 'unknown',
'SEMI DETACHED': 'unknown',
'2 EXT WALLS': 'unknown',
'MID TERRACED': 'unknown',
'BUNGLAOW SEMI DETACHED': 'bungalow',
'END TERRACE': 'unknown',
'3 EXT WALLS': 'unknown',
'Mid Terrace': 'unknown',
'3 EXT WALL - NOW 2 EXT': 'unknown',
'MID TERR': 'unknown',
'DETACHED': 'unknown',
'MID-TERRACE': 'unknown',
'Bunagalow Semi Detached': 'bungalow',
'End-terrace': 'unknown',
'END - TERRACE': 'unknown',
'SEMI-DETACHED': 'unknown',
'Semi-Detached': 'unknown',
'MID TERRACE': 'unknown',
'End Terrace': 'unknown',
'Detached': 'unknown',
'Mid-terrace': 'unknown',
'MID - TERRACE': 'unknown'
}

View file

@ -246,4 +246,59 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'Pitched, 150 mm loft insulation': 'pitched insulated',
'Flat, limited insulation (assumed)': 'flat uninsulated',
'Pitched (no access to loft) 350mm': 'pitched insulated',
'Pitched (no access to loft) 200mm': 'pitched insulated',
'Pitched (access to loft) 200mm': 'pitched insulated',
'Pitched (no access to loft) 250mm': 'pitched insulated',
'Pitched (access to loft) 100mm': 'pitched insulated',
'Another dwelling above ND (inferred)': 'another dwelling above',
'Pitched (no access to loft) N/A': 'pitched no access to loft',
'Pitched (no access to loft) ND (inferred)': 'pitched no access to loft',
'Pitched (no access to loft) 150mm': 'pitched insulated',
'Pitched (access to loft) 400mm+': 'pitched insulated',
'Pitched (no access to loft) 300mm': 'pitched insulated',
'Pitched (access to loft) <25mm': 'pitched less than 100mm insulation',
'Pitched (access to loft) None': 'pitched less than 100mm insulation',
'Pitched (access to loft) 300mm': 'pitched insulated',
'Pitched (access to loft) 50mm': 'pitched less than 100mm insulation',
'Pitched (access to loft) 270mm': 'pitched insulated',
'Pitched (access to loft) Non-joist': 'pitched access to loft',
'Pitched (access to loft) 250mm': 'pitched insulated',
'Another dwelling above N/A': 'another dwelling above',
'Pitched (access to loft) 150mm': 'pitched insulated',
'Pitched (access to loft) ND (inferred)': 'pitched access to loft',
'Pitched (access to loft) 350mm': 'pitched insulated',
'Pitched (access to loft) NR': 'pitched unknown insulation',
'Pitched (access to loft) 75mm': 'pitched less than 100mm insulation',
'Pitched (access to loft) N/A': 'pitched access to loft',
'ND (inferred) 250mm': 'unknown insulated',
'Pitched (vaulted ceiling) Non-joist': 'pitched unknown insulation',
'ND (inferred) ND (inferred)': 'unknown',
'Flat Non-joist': 'flat insulated',
'Same dwelling above N/A': 'another dwelling above',
'Flat: As Built, PitchedNormalLoftAccess: Unknown': 'flat unknown insulation',
'PitchedNormalLoftAccess: Unknown, PitchedNormalNoLoftAccess: Unknown': 'pitched unknown insulation',
'PitchedNormalLoftAccess: 400mm+': 'pitched insulated',
'AnotherDwellingAbove: 150mm': 'another dwelling above',
'Flat: 150mm': 'flat insulated',
'AnotherDwellingAbove: 50mm': 'another dwelling above',
'PitchedNormalNoLoftAccess: As Built': 'pitched no access to loft',
'PitchedNormalLoftAccess: 250mm, PitchedWithSlopingCeiling: As Built': 'pitched insulated',
'PitchedNormalLoftAccess: 200mm, PitchedWithSlopingCeiling: As Built': 'pitched insulated',
'PitchedNormalLoftAccess: 350mm': 'pitched insulated',
'PitchedNormalNoLoftAccess: 270mm': 'pitched no access to loft',
'AnotherDwellingAbove: 100mm': 'another dwelling above',
'PitchedWithSlopingCeiling: Unknown': 'piched unknown insulation',
'AnotherDwellingAbove: Unknown, Flat: As Built': 'another dwelling above',
'Flat: Unknown, PitchedNormalLoftAccess: 25mm': 'flat unknown insulation',
'SameDwellingAbove: Unknown': 'another dwelling above',
'Flat: Unknown': 'flat unknown insulation',
'Flat: 50mm, PitchedNormalLoftAccess: 100mm': 'flat insulated',
'Flat: As Built, PitchedNormalLoftAccess: 250mm, PitchedWithSlopingCeiling: As Built': 'flat unknown insulation',
'Flat: As Built, PitchedNormalLoftAccess: 400mm+': 'flat unknown insulation',
'PitchedWithSlopingCeiling: As Built': 'pitched insulated',
'PitchedNormalLoftAccess: As Built': 'pitched unknown insulation',
}

View file

@ -342,5 +342,18 @@ WALL_CONSTRUCTION_MAPPINGS = {
'Solid brick, as built, partial insulation (assumed)': 'insulated solid brick',
'Sandstone, as built, no insulation (assumed)': 'uninsulated sandstone or limestone',
'System built, as built, partial insulation (assumed)': 'system built unknown insulation',
'Timber frame, with external insulation': 'insulated timber frame'
'Timber frame, with external insulation': 'insulated timber frame',
'Cob As-built': 'cob',
'System built Unknown insulation': 'system built unknown insulation',
'Solid brick Unknown insulation': 'solid brick unknown insulation',
'Timber frame Internal': 'insulated timber frame',
'System built External': 'insulated system built',
'Stone As-built': 'uninsulated sandstone or limestone',
'System built As-built': "uninsulated system built",
'System built Internal': 'insulated system built',
'Cavity: AsBuilt (1976-1982), TimberFrame: AsBuilt': 'cavity unknown insulation',
'Cavity: FilledCavityPlusExternal': 'filled cavity'
}

View file

@ -1,6 +1,7 @@
import time
import random
import pandas as pd
from backend.SearchEpc import SearchEpc
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
from tqdm import tqdm
@ -9,6 +10,132 @@ from utils.logger import setup_logger
logger = setup_logger()
def get_data_for_property(
address1: str,
postcode: str,
full_address: str,
property_type: [str | None],
built_form: [str | None],
uprn: [str | float | None],
epc_auth_token: str,
find_my_epc_return_page: bool
):
"""
Utility function that will fetch the data for a single property
:return:
"""
if property_type == "block of flats":
return None
house_number = str(address1).strip()
full_address = full_address.strip()
house_no = SearchEpc.get_house_number(address=str(house_number), postcode=postcode)
if house_no is None:
house_no = house_number
if pd.isnull(uprn):
uprn = None
searcher = SearchEpc(
address1=str(house_no),
postcode=postcode,
auth_token=epc_auth_token,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address,
max_retries=5,
uprn=uprn
)
# Force the skipping of estimating the EPC
# We check if the property was split
searcher.ordnance_survey_client.property_type = property_type
searcher.ordnance_survey_client.built_form = built_form
searcher.find_property(skip_os=True)
# Check if we have a flat or appartment
if searcher.newest_epc is None and uprn is None:
# Try again:
if SearchEpc.get_house_number(address=str(house_number), postcode=postcode) is None:
# Backup
add1 = full_address.split(",")
if len(add1) > 1:
add1 = add1[1].strip()
else:
# Try splitting on space
add1 = full_address.split(" ")[0].strip()
else:
add1 = str(house_number)
searcher = SearchEpc(
address1=add1,
postcode=postcode,
auth_token=epc_auth_token,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address,
max_retries=5
)
if (
"flat" in house_number.lower() or "apartment" in house_number.lower() or "apt" in
house_number.lower()
):
searcher.ordnance_survey_client.property_type = "Flat"
searcher.find_property(skip_os=True)
# As a final resort, we estimate the EPC
if property_type is not None and searcher.newest_epc is None:
searcher.ordnance_survey_client.property_type = property_type
searcher.ordnance_survey_client.built_form = built_form
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
return None
# Retrieve data from FindMyEPC
try:
find_epc_searcher = RetrieveFindMyEpc(
address=searcher.newest_epc["address"],
postcode=searcher.newest_epc["postcode"]
)
find_epc_response = find_epc_searcher.retrieve_newest_find_my_epc_data(
return_page=find_my_epc_return_page
)
except ValueError as e:
if "No EPC found" in str(e) and "address1" in searcher.newest_epc:
try:
find_epc_searcher = RetrieveFindMyEpc(
address=searcher.newest_epc["address1"], postcode=searcher.newest_epc["postcode"]
)
find_epc_response = find_epc_searcher.retrieve_newest_find_my_epc_data()
except ValueError as e:
if "No EPC found" in str(e):
find_epc_response = ({}, None) if find_my_epc_return_page else ({})
else:
logger.error(f"Error retrieving FindMyEPC data: {e}")
raise Exception(f"Error retrieving FindMyEPC data: {e}")
else:
find_epc_response = ({}, None) if find_my_epc_return_page else ({})
except Exception as e:
raise Exception(f"Error retrieving FindMyEPC data: {e}")
newest_epc = searcher.newest_epc
older_epcs = searcher.older_epcs
find_my_epc_page = None
if find_my_epc_return_page:
find_my_epc_data, find_my_epc_page = find_epc_response
else:
find_my_epc_data = find_epc_response
return newest_epc, older_epcs, find_my_epc_data, find_my_epc_page
def get_data(
df,
manual_uprn_map,

View file

@ -1,11 +1,14 @@
from enum import Enum
from typing import List
import pandas as pd
from utils.logger import setup_logger
from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes
from backend.app.plan.schemas import VALID_HOUSING_TYPES, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES, \
MEASURE_MAP
logger = setup_logger(__name__)
class EligibilityCaveats(Enum):
EPC_RATING = "epc_rating" # EPC requirements not met
@ -365,6 +368,8 @@ class Funding:
starting_str = "1.7"
elif closest_starting == 1:
starting_str = "1.0"
elif closest_starting == 0.6:
starting_str = "0.6"
else:
starting_str = f"{closest_starting:.2f}"
@ -539,6 +544,8 @@ class Funding:
pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == measure_code]
if pps.shape[0] != 1:
if pps.empty and self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]:
return 0
raise ValueError(f"Invalid IWI category: {measure_code}")
return pps.squeeze()["Cost Savings"]
@ -551,6 +558,8 @@ class Funding:
pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == measure_code]
if pps.shape[0] != 1:
if pps.empty and self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]:
return 0
raise ValueError(f"Invalid EWI category: {measure_code}")
return pps.squeeze()["Cost Savings"]
@ -559,6 +568,8 @@ class Funding:
pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == measure_code]
if pps.shape[0] != 1:
if pps.empty and self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]:
return 0
raise ValueError(f"Invalid CWI category: {measure_code}")
return pps.squeeze()["Cost Savings"]
@ -578,6 +589,11 @@ class Funding:
return pps.squeeze()["Cost Savings"]
if measure_type == "flat_roof_insulation":
# Not funding for properties starting at C or above
if self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]:
return 0
pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == "FRI"]
if pps.shape[0] != 1:
raise ValueError("Invalid FRI category")
@ -589,6 +605,8 @@ class Funding:
code = "RIRI_res_unin"
pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == code]
if pps.shape[0] != 1:
if pps.empty and self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]:
return 0
raise ValueError(f"Invalid RIRI category: {code}")
return pps.squeeze()["Cost Savings"]
@ -632,13 +650,25 @@ class Funding:
if self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]:
return 0
pps = filtered_pps_matrix[
(filtered_pps_matrix["Pre_Main_Heating_Source"] == pre_heating_system) &
(filtered_pps_matrix["Post_Main_Heating_Source"] == "Air to Water ASHP") &
(filtered_pps_matrix["Measure_Type"] == "B_Upgrade_nopreHCs")
pps_data = filtered_pps_matrix[
filtered_pps_matrix["Post_Main_Heating_Source"] == "Air to Water ASHP"
]
if pre_heating_system not in pps_data["Pre_Main_Heating_Source"].values:
logger.info(
f"No PPS data for ASHP upgrade from {pre_heating_system}, returning 0"
)
return 0
pps = pps_data[
(pps_data["Pre_Main_Heating_Source"] == pre_heating_system) &
(pps_data["Measure_Type"] == "B_Upgrade_nopreHCs")
# We assume we'll be making a heating system upgrade
]
# Not every pre heating system will result in PPS, e.g. a ground source heat pump to ASHP upgrade
# won't have a PPS.
if pps.shape[0] != 1:
raise ValueError("something went wrong, more than one pps for ashp")
return pps.squeeze()["Cost Savings"]
@ -789,7 +819,7 @@ class Funding:
if not has_eligibile_heating:
# We check if there is a recommendation for an ASHP or HHRSH
if ("air_source_heat_pump" not in measure_types) and (
"high_heat_retention_storage_heater" not in measure_types):
"high_heat_retention_storage_heaters" not in measure_types):
return True, False, True
# 2) We check if there is a wall insulation measure for this property. If so, we make sure

View file

@ -22,8 +22,8 @@ from recommendations.recommendation_utils import (
)
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
from backend.app.utils import sap_to_epc
from backend.Funding import Funding
import backend.app.assumptions as assumptions
from backend.app.db.models.portfolio import rating_lookup
ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
DATA_BUCKET = os.environ.get(
@ -65,6 +65,7 @@ class Property:
# Surplus information, that can be provided as optional inputs, by a customer
n_bathrooms = None
n_bedrooms = None
landlord_property_id = None # unique reference for the property as recognised by the landlord
building_id = None # Used to group properties together into a single building
# Contains the solar panel optimisation results from the Google Solar API
@ -80,12 +81,14 @@ class Property:
postcode,
address,
epc_record,
uprn=None, # Pass as an optional input
property_valuation=None,
already_installed=None,
non_invasive_recommendations=None,
measures=None,
energy_assessment=None,
is_new=True,
inspections=None,
**kwargs
):
@ -106,7 +109,7 @@ class Property:
# of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the
# cost and instead, provide a message that the measure has already been installed
self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
self.already_installed = already_installed
self.non_invasive_recommendations = (
non_invasive_recommendations['recommendations'] if
non_invasive_recommendations else []
@ -119,7 +122,7 @@ class Property:
self.valuation = property_valuation
self.uprn = epc_record.get("uprn")
self.uprn = uprn if uprn is not None else epc_record.get("uprn")
self.uprn_source = self.data.get("uprn-source")
self.full_sap_epc = epc_record.get("full_sap_epc")
@ -209,6 +212,9 @@ class Property:
self.energy_assessment_condition_data = energy_assessment["condition"]
self.energy_assessment_is_newer = energy_assessment["energy_assessment_is_newer"]
# Store inspections
self.inspections = inspections
# TODO: We keep this but only temporarily until we add bathrooms, bedrooms, building id to the condition data
self.parse_kwargs(kwargs)
@ -265,8 +271,9 @@ class Property:
"number_of_floors": number_of_floors,
"insulation_floor_area": insulation_floor_area,
"insulation_wall_area": insulation_wall_area,
"building_id": kwargs.get("building_id", None),
"floor_area": floor_area
"building_id": kwargs.get("building_id", kwargs.get("landlord_block_reference", None)),
"floor_area": floor_area,
"landlord_property_id": kwargs.get("landlord_property_id"),
}
def parse_kwargs(self, kwargs):
@ -295,9 +302,18 @@ class Property:
if k in fixed_data_col_names
}
difference_record = self.epc_record.create_EPCDifferenceRecord(
self.epc_record, fixed_data
)
difference_record = self.epc_record.create_EPCDifferenceRecord(self.epc_record, fixed_data)
# We have rare cases where entire description columns are missing. EpcRecords will convert this to None.
# Due to the sensitivity of the EPCDifferenceRecord creation to missing data, we will fill in these missing
# descriptions with and empty string, for the purpose of creating this scoring record
description_cols = [
x for x in difference_record.difference_record if
"_description" in x and difference_record.difference_record[x] is None
]
if description_cols:
for col in description_cols:
difference_record.difference_record[col] = ""
self.base_difference_record = TrainingDataset(datasets=[difference_record], cleaned_lookup=cleaned_lookup)
@ -325,7 +341,6 @@ class Property:
recommendation_record=recommendation_record,
recommendations=property_representative_recommendations,
primary_recommendation_id=self.id,
non_invasive_recommendations=self.non_invasive_recommendations,
)
return scoring_dict
@ -454,10 +469,8 @@ class Property:
# It means we've recommended HHR with electric immersion, and shouldn't overwrite
# the hot water description
continue
# Set the new value otherwise as it's due to already installed measures - do nothing
raise NotImplementedError(
"Already have this key in the phase_epc_transformation - implement me"
)
phase_epc_transformation[k] = v
simulation_epc.update(phase_epc_transformation)
self.simulation_epcs[rec["recommendation_id"]] = simulation_epc
@ -596,7 +609,13 @@ class Property:
if self.data[description] in self.DATA_ANOMALY_MATCHES:
template = cleaned[description][0]
fill_dict = dict(zip(template.keys(), [None] * len(template)))
# Handling edge case for walls
fill_with = False if description == "walls-description" else None
fill_dict = dict(zip(template.keys(), [fill_with] * len(template)))
if description == "walls-description":
fill_dict["thermal_transmittance_unit"] = None
fill_dict["insulation_thickness"] = "none"
fill_dict.update(
{
"original_description": self.data[description],
@ -721,11 +740,12 @@ class Property:
self.energy_cost_estimates = {
"unadjusted": unadjusted_heating_costs,
"epc": {
"heating": float(self.data["heating-cost-current"]),
"hot_water": float(self.data["hot-water-cost-current"]),
"lighting": float(self.data["lighting-cost-current"]),
}
# Don't think we need the EPC
# "epc": {
# "heating": float(self.data["heating-cost-current"]),
# "hot_water": float(self.data["hot-water-cost-current"]),
# "lighting": float(self.data["lighting-cost-current"]),
# }
}
self.energy_consumption_estimates = {
@ -778,13 +798,19 @@ class Property:
to_update[k] = None
return to_update
def get_full_property_data(self, current_valuation=None):
def get_full_property_data(self, current_valuation=None, needs_rebaselining=False, rebaselining_sap=0):
"""
This method extracts the data which is pushed to the database, containing core information, from the EPC
about a property
:return:
"""
current_sap_rating = float(self.data["current-energy-efficiency"])
if needs_rebaselining:
current_sap_rating += rebaselining_sap
current_epc_rating = sap_to_epc(current_sap_rating)
property_data = {
"creation_status": "READY",
"uprn": int(self.data["uprn"]),
@ -801,9 +827,12 @@ class Property:
"number_of_rooms": self.number_of_rooms,
"year_built": self.year_built,
"tenure": self.data["tenure"],
"current_epc_rating": self.data["current-energy-rating"],
"current_sap_points": self.data["current-energy-efficiency"],
"current_epc_rating": current_epc_rating,
"current_sap_points": current_sap_rating,
"current_valuation": current_valuation,
"original_sap_points": self.data["current-energy-efficiency"],
"is_sap_points_adjusted_for_installed_measures": needs_rebaselining,
"installed_measures_sap_point_adjustment": rebaselining_sap,
}
property_data = self._clean_upload_data(property_data)
@ -811,7 +840,7 @@ class Property:
return property_data
@classmethod
def _prepare_rating_field(cls, field, rating_lookup):
def _prepare_rating_field(cls, field):
"""
Utility function for usage in the lambda, for preparing the _rating fields
"""
@ -821,48 +850,68 @@ class Property:
else None
)
def get_property_details_epc(self, portfolio_id: int, rating_lookup):
def get_property_details_epc(
self, portfolio_id: int, needs_rebaselining: bool = False, rebaselining_carbon: float = 0,
rebaselining_heat_demand: float = 0, rebaselining_kwh: float = 0, rebaselining_bills: float = 0
):
if self.current_energy_bill is None:
raise ValueError("Current energy bill has not been set")
# IF we have a SAP05 overwrite, we pull out the relevant information
sap_05_overwritten = self.data.get("sap-05-overwritten", False)
sap_05_score, sap_05_epc_rating = None, None
if sap_05_overwritten:
if not self.old_data:
# Trying to fetch SAP05 EPC but no data
raise ValueError("Trying to fetch SAP05 EPC but no old data available")
# We get the last rating from the old data
newest_old_epc = max(self.old_data, key=lambda d: pd.to_datetime(d["lodgement-date"]))
# Get the rating and score
sap_05_score = int(newest_old_epc["current-energy-efficiency"])
sap_05_epc_rating = newest_old_epc["current-energy-rating"]
lodgement_date = self.data["lodgement-date"]
# We check if the lodgement date is more than 10 years old
is_expired = (datetime.now() - pd.to_datetime(lodgement_date)) > timedelta(days=3650)
# Handle re-baselining
co2_emissions = self.energy["co2_emissions"]
primary_energy_consumption = self.energy["primary_energy_consumption"]
current_kwh_demand = self.current_energy_consumption
current_kwh_heating_hotwater = self.current_energy_consumption_heating_hotwater
if needs_rebaselining:
# Carbon will be reduced
co2_emissions -= rebaselining_carbon
# Heat demand will be reduced
primary_energy_consumption -= rebaselining_heat_demand
current_kwh_demand -= rebaselining_kwh
current_kwh_heating_hotwater -= rebaselining_kwh
property_details_epc = {
"property_id": self.id,
"portfolio_id": portfolio_id,
"lodgement_date": datetime.fromisoformat(lodgement_date),
"is_expired": is_expired,
"full_address": self.data["address"],
"total_floor_area": float(self.data["total-floor-area"]),
"walls": self.walls["clean_description"],
"walls_rating": self._prepare_rating_field(
self.data["walls-energy-eff"], rating_lookup
),
"walls_rating": self._prepare_rating_field(self.data["walls-energy-eff"]),
"roof": self.roof["clean_description"],
"roof_rating": self._prepare_rating_field(
self.data["roof-energy-eff"], rating_lookup
),
"roof_rating": self._prepare_rating_field(self.data["roof-energy-eff"]),
"floor": self.floor["clean_description"],
"floor_rating": self._prepare_rating_field(
self.data["floor-energy-eff"], rating_lookup
),
"floor_rating": self._prepare_rating_field(self.data["floor-energy-eff"]),
"windows": self.windows["clean_description"],
"windows_rating": self._prepare_rating_field(
self.data["windows-energy-eff"], rating_lookup
),
"windows_rating": self._prepare_rating_field(self.data["windows-energy-eff"]),
"heating": self.main_heating["clean_description"],
"heating_rating": self._prepare_rating_field(
self.data["mainheat-energy-eff"], rating_lookup
),
"heating_rating": self._prepare_rating_field(self.data["mainheat-energy-eff"]),
"heating_controls": self.main_heating_controls["clean_description"],
"heating_controls_rating": self._prepare_rating_field(
self.data["mainheatc-energy-eff"], rating_lookup
),
"heating_controls_rating": self._prepare_rating_field(self.data["mainheatc-energy-eff"]),
"hot_water": self.hotwater["clean_description"],
"hot_water_rating": self._prepare_rating_field(
self.data["hot-water-energy-eff"], rating_lookup
),
"hot_water_rating": self._prepare_rating_field(self.data["hot-water-energy-eff"]),
"lighting": self.lighting["clean_description"],
"lighting_rating": self._prepare_rating_field(
self.data["lighting-energy-eff"], rating_lookup
),
"lighting_rating": self._prepare_rating_field(self.data["lighting-energy-eff"]),
"mainfuel": self.main_fuel["clean_description"],
"ventilation": self.ventilation["ventilation"],
"solar_pv": self.solar_pv["solar_pv"],
@ -871,19 +920,30 @@ class Property:
"floor_height": self.floor_height,
"heat_loss_corridor": self.heat_loss_corridor["heat_loss_corridor_boolean"],
"unheated_corridor_length": self.heat_loss_corridor["length"],
"number_of_open_fireplaces": self.number_of_open_fireplaces[
"number_of_open_fireplaces"
],
"number_of_open_fireplaces": self.number_of_open_fireplaces["number_of_open_fireplaces"],
"number_of_extensions": self.number_of_extensions["number_of_extensions"],
"number_of_storeys": self.number_of_storeys["number_of_storeys"],
"mains_gas": self.mains_gas,
"energy_tariff": self.data["energy-tariff"],
"primary_energy_consumption": self.energy["primary_energy_consumption"],
"co2_emissions": self.energy["co2_emissions"],
"current_energy_demand": self.current_energy_consumption,
"current_energy_demand_heating_hotwater": self.current_energy_consumption_heating_hotwater,
"primary_energy_consumption": primary_energy_consumption,
"co2_emissions": co2_emissions,
"current_energy_demand": current_kwh_demand, # This is kwh - naming is confusing
"current_energy_demand_heating_hotwater": current_kwh_heating_hotwater, # This is kwh
"estimated": self.data.get("estimated", False),
**self.current_energy_bill
# We indicate if we've overwritten a SAP 05 EPC
"sap_05_overwritten": sap_05_overwritten,
"sap_05_score": sap_05_score,
"sap_05_epc_rating": sap_05_epc_rating,
**self.current_energy_bill,
"original_co2_emissions": self.energy["co2_emissions"],
"original_primary_energy_consumption": self.energy["primary_energy_consumption"],
"original_current_energy_demand": self.current_energy_consumption, # Bad naming, this is kwh
"original_current_energy_demand_heating_hotwater": self.current_energy_consumption_heating_hotwater, # kwh
"installed_measures_co2_adjustment": rebaselining_carbon,
"installed_measures_energy_demand_adjustment": rebaselining_kwh, # kwh
"installed_measures_total_energy_bill_adjustment": rebaselining_bills,
"installed_measures_heat_demand_adjustment": rebaselining_heat_demand,
"is_epc_adjusted_for_installed_measures": needs_rebaselining,
}
return property_details_epc
@ -1070,7 +1130,12 @@ class Property:
elif self.floor["thermal_transmittance"] is not None:
self.floor_type = "solid"
else:
raise NotImplementedError("Implement this floor type")
# in this case, it's not super clear what the floor type is, so we default - this is a temp
logger.warning(
f"Could not determine floor type, given: '{self.floor['original_description']}', defaulting to "
f"suspended for property {self.uprn}"
)
self.floor_type = "suspended"
@staticmethod
def _extract_component(
@ -1154,6 +1219,7 @@ class Property:
'has_community_scheme': 'Varied (Community Scheme)',
"has_dual_fuel_mineral_and_wood": 'Wood Logs',
"has_electricaire": 'Electricity',
"has_wood_chips": 'Wood Logs'
}
# Hot water
@ -1185,6 +1251,19 @@ class Property:
'oil range cooker': 'Oil'
}
fuel_map = {
None: "Natural Gas (Community Scheme)",
"mains gas": "Natural Gas (Community Scheme)",
"biomass": "Smokeless Fuel",
"electricity": "Electricity",
"biogas": "Smokeless Fuel",
"heat network": "Natural Gas (Community Scheme)",
"lpg": 'LPG',
"biodiesel": "Smokeless Fuel",
"b30d": "B30K Biofuel",
"coal": "Coal",
}
self.heating_energy_source = list({
fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False)
})
@ -1197,6 +1276,12 @@ class Property:
else:
self.heating_energy_source = ['Electricity']
if set(self.heating_energy_source) == {'Electricity', 'LPG'}:
if self.main_fuel["clean_description"] in ["Lpg not community", "Lpg community"]:
self.heating_energy_source = ['LPG']
else:
self.heating_energy_source = ['Electricity']
if set(self.heating_energy_source) == {'Natural Gas', 'Wood Logs'}:
# It means they have mixed heating so we take the primary one, based on main fuel
# This will probably happen in the case of an extension
@ -1205,21 +1290,50 @@ class Property:
else:
self.heating_energy_source = ['Wood Logs']
if len(self.heating_energy_source) == 0 or len(self.heating_energy_source) > 1:
if len(self.heating_energy_source) > 1 and "Varied (Community Scheme)" not in self.heating_energy_source:
# We might have something like heating energy source equal to ['Natural Gas', 'Varied (Community Scheme)']
# so we treat this as community heating
raise Exception("Investigate me")
if len(self.heating_energy_source) == 0:
heating_flags = {
v for k, v in self.main_heating.items() if k not in ["original_description", "clean_description"]
}
hotwater_flags = {
v for k, v in self.hotwater.items() if k not in ["original_description", "clean_description"]
}
# If all flags are zero, we have a no data example
if (heating_flags == {False} or hotwater_flags == {None}) and (
hotwater_flags == {False} or hotwater_flags == {None}):
# We have nodata so we try and rely on main fuel
if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown
mapped_fuel = fuel_map[self.main_fuel["fuel_type"]]
self.heating_energy_source = mapped_fuel
self.hot_water_energy_source = mapped_fuel
return
else:
raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}")
# We handle edge case where no heating system is indicated
if self.main_fuel["fuel_type"] in fuel_map:
mapped_fuel = fuel_map[self.main_fuel["fuel_type"]]
self.heating_energy_source = mapped_fuel
self.hot_water_energy_source = mapped_fuel
return
if len(self.heating_energy_source) > 1:
# We treat this as a community scheme
self.heating_energy_source = ["Varied (Community Scheme)"]
self.heating_energy_source = self.heating_energy_source[0]
if self.heating_energy_source == "Varied (Community Scheme)":
fuel_map = {
None: "Natural Gas (Community Scheme)",
"mains gas": "Natural Gas (Community Scheme)",
"biomass": "Smokeless Fuel",
}
if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown
self.heating_energy_source = fuel_map[self.main_fuel["fuel_type"]]
else:
raise Exception("Implement me")
raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}")
if self.hotwater["heater_type"] is not None:
self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]]
@ -1241,7 +1355,7 @@ class Property:
secondary_heating = self.data["secondheat-description"]
self.hot_water_energy_source = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[secondary_heating]["fuel"]
else:
raise Exception("Investiage me")
raise NotImplementedError(f"Investiage me - unhandled hot water fuel {fuel}")
else:
self.hot_water_energy_source = hotwater_appliance_to_fuel[self.hotwater["appliance"]]
@ -1294,9 +1408,17 @@ class Property:
self.roof["is_flat"] or self.roof["is_pitched"] or self.roof["is_roof_room"]
)
# If there is no existing solar PV, the photo-supply field will be None or a missing value
has_no_existing_solar_pv = self.data["photo-supply"] in [
None, 0, self.DATA_ANOMALY_MATCHES
]
# We use inspections data to tell us this
if getattr(self.inspections, "roof_orientation", None):
has_no_existing_solar_pv = self.inspections.roof_orientation.value not in [
"already has solar pv", "roof too small", "no roof"
]
else:
has_no_existing_solar_pv = self.data["photo-supply"] in [
None, 0, self.DATA_ANOMALY_MATCHES
]
return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv
@ -1325,29 +1447,14 @@ class Property:
if not self.is_ashp_valid(measures=["air_source_heat_pump"]):
return self.current_energy_consumption
# If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain
remap_fuel_sources = [
"Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity", "Coal", "Smokeless Fuel",
"Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal"
]
heating_energy_source = self.heating_energy_source
hot_water_energy_source = self.hot_water_energy_source
heating_consumption = self.energy_consumption_estimates["unadjusted"]["heating"]
hotwater_consumption = self.energy_consumption_estimates["unadjusted"]["hot_water"]
if (heating_energy_source not in remap_fuel_sources) or (
hot_water_energy_source not in remap_fuel_sources + ["Electricity + Solar Thermal"]
):
raise NotImplementedError("Have not implemented estimating electrical consumption for this fuel type")
# Adjust the heating consumption to reflect the expected efficiency of an ASHP - broadly 3.0 COP
heating_consumption = heating_consumption / (assumed_ashp_efficiency / 100)
if heating_energy_source in remap_fuel_sources:
# Adjust the heating consumption to reflect the expected efficiency of an ASHP
heating_consumption = heating_consumption / (assumed_ashp_efficiency / 100)
if hot_water_energy_source in remap_fuel_sources:
# Adjust the hot water consumption to reflect the expected efficiency of an ASHP
hotwater_consumption = hotwater_consumption / (assumed_ashp_efficiency / 100)
# Adjust the hot water consumption to reflect the expected efficiency of an ASHP
hotwater_consumption = hotwater_consumption / (assumed_ashp_efficiency / 100)
electric_consumption = (
heating_consumption +

View file

@ -144,6 +144,11 @@ class SearchEpc:
"error": None
}
# Keys that we check for missing values to determine if the EPC is incomplete
CHECK_MISSING_KEYS = [
"lighting-cost-current", "heating-cost-current", "hot-water-cost-current", "energy-consumption-potential"
]
def __init__(
self,
address1: str,
@ -156,6 +161,8 @@ class SearchEpc:
size=None,
property_type=None,
fast=False,
heating_system: [str, None] = None,
associated_uprns: [List[int] | None] = None
):
"""
Address lines 1 and postcode are mandatory fields. The other address lines are optional
@ -171,6 +178,11 @@ class SearchEpc:
:param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's
default
:param property_type: str, optional, the property type of the property, if known before hand
:param fast: bool, optional, if true, the extract_epc_data method will skip some processing to return
results faster
:param heating_system: str, optional, the heating system of the property, if known before hand
:param associated_uprns: list of int, optional, list of associated uprns for the property. E.g. other
units in a block of flats
"""
self.address1 = address1
@ -179,6 +191,10 @@ class SearchEpc:
self.uprn = uprn
self.house_number = self.get_house_number(self.address1)
self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number)
self.associated_uprns = associated_uprns if associated_uprns is not None else []
# property attributes
self.heating_system = heating_system
self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES
@ -188,7 +204,7 @@ class SearchEpc:
)
self.data = None
self.newest_epc = None
self.newest_epc = {}
self.older_epcs = None
self.full_sap_epc = None
self.metadata = None
@ -197,12 +213,19 @@ class SearchEpc:
# These are the address and postcode values, which we store in the database
self.address_clean = None
self.postcode_clean = None
self.address_postal_town = None
self.size = size if size is not None else 25
self.property_type = property_type
self.fast = fast
# By default, this is set to false. This flag indicates whether we should overwrite SAP 2005 entires.
self.overwrite_sap05 = False
# Be default, this is set to false. This flag indicates whether we should take the existing EPC, but use
# the estimated EPC to clean missings
self.clean_missing_on_expired = False
def set_strict_property_type_search(self):
"""
This method sets the strict property type search flag to True. When this flag is set, the search will
@ -347,7 +370,8 @@ class SearchEpc:
# We update the data with the correct uprn
if self.uprn:
for x in api_response["response"]["rows"]:
x["uprn"] = self.uprn
if pd.isnull(x["uprn"]):
x["uprn"] = self.uprn
data["rows"].extend(api_response["response"]["rows"])
@ -357,6 +381,8 @@ class SearchEpc:
row for row in data["rows"]
if row["lmk-key"] not in seen and not seen.add(row["lmk-key"])
]
# Overwrite the data
self.data = data
if data["rows"]:
api_response["msg"] = self.SUCCESS
@ -415,12 +441,33 @@ class SearchEpc:
address, [", ".join([r["address"]]) for r in rows], score_cutoff=0
)
# Pick the largest score
if best_match1[1] >= best_match2[1]:
# Get all of the scores
rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match1[0]]
if best_match1[1] == best_match2[1]:
# if thery're the same, we'll work under the assumption that the addresses are the same and we'll
# take whichever has the newest EPC
rows_filtered = [
r for r in rows
if (", ".join([r["address"], r["posttown"]]) == best_match1[0]) or
(r["address"] == best_match2[0])
]
rows_filtered = [
r for r in rows_filtered
if r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in rows_filtered])
]
elif best_match1[1] > best_match2[1]:
# Get all of the scores - make sure we keep uprn
rows_filtered = [
r for r in rows if
(
(", ".join([r["address"], r["posttown"]]) == best_match1[0]) or
(str(r["uprn"]) == str(self.uprn))
)
]
else:
# Get all of the scores
rows_filtered = [r for r in rows if r["address"] == best_match2[0]]
rows_filtered = [
r for r in rows if (r["address"] == best_match2[0]) or (str(r["uprn"]) == str(self.uprn))
]
# If we have multiple, we filter on newest lodgment date
if len(rows_filtered) > 1:
@ -460,7 +507,11 @@ class SearchEpc:
postcode = postcode.upper()
return address, postcode
# We also return a "postal town variant - useful for edge cases when fetching from find my EPC
address_postal_town = ", ".join(
[newest_epc["address1"], newest_epc["address2"], newest_epc["posttown"]]).strip().title()
return address, postcode, address_postal_town
def extract_epc_data(self, address=None):
@ -489,7 +540,7 @@ class SearchEpc:
newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows)
# Ge the uprn from the newest record for this home
uprns = {r["uprn"] for r in rows if r["uprn"]}
uprns = {str(r["uprn"]) for r in rows if r["uprn"]}
# We can sometimes have no uprn for a property
if (len(uprns) == 0) and len(rows) > 0:
logger.warning("Found data but missing uprn")
@ -500,21 +551,42 @@ class SearchEpc:
# Take the uprn from the most recent
uprns = {newest_epc["uprn"]}
else:
raise ValueError("Multiple UPRNs found - investigate me")
# We check if we have UPRNs that match the one we're given and if so, filter on those
if self.uprn is not None:
uprns = {u for u in uprns if int(u) == self.uprn}
if len(uprns) == 1:
logger.info(
f"Multiple UPRNs found but one matches provided UPRN {self.uprn}, using this UPRN"
)
else:
raise ValueError("Multiple UPRNs found - investigate me")
if uprns:
uprn = uprns.pop()
else:
newest_epc["uprn-source"] = self.UPRN_SOURCE_SIMULATED
uprn = hash(self.address1 + self.postcode)
# if uprns:
# epc_uprn = uprns.pop()
# # Convert to int
# if not pd.isnull(epc_uprn):
# uprn = int(epc_uprn)
# else:
# newest_epc["uprn-source"] = self.UPRN_SOURCE_SIMULATED
# uprn = hash(self.address1 + self.postcode)
if self.uprn is not None and uprns:
epc_uprn = uprns.pop()
if int(epc_uprn) != self.uprn:
logger.warning(
f"Provided UPRN {self.uprn} does not match EPC UPRN {epc_uprn}, using provided UPRN"
)
# We overwrite but in this instance, we've likely got the wrong EPC data
# Insert as a string - same format as the raw data
newest_epc["uprn"] = str(self.uprn)
if self.fast:
return newest_epc, [], {}, "", "", None
return newest_epc, [], {}, "", "", ""
# Retrieve postcode and address
address_epc, postcode_epc = self.format_address(newest_epc=newest_epc)
address_epc, postcode_epc, address_postal_town = self.format_address(newest_epc=newest_epc)
return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn
return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, address_postal_town
@staticmethod
def filter_newest_epc(list_of_epcs: List):
@ -555,7 +627,9 @@ class SearchEpc:
lmks_to_drop: list[str] | None = None,
built_form: str = "",
property_type: str = "",
exclude_old: bool = False
exclude_old: bool = False,
heating_system: [str, None] = None,
associated_uprns: [List[int] | None] = None
):
"""
Fetches and processes EPC data for a given initial postcode, applying successive trimming
@ -575,9 +649,13 @@ class SearchEpc:
:param built_form: The 'built-form' value to be used for filtering the EPC data.
:param property_type: The 'property-type' value to be used for filtering the EPC data.
:param exclude_old: Flag to exclude EPC data older than 10 years.
:param heating_system: Optional heating system type for additional filtering.
:param associated_uprns: Optional list of associated UPRNs for additional filtering.
:return:
"""
associated_uprns_to_apply = [] if associated_uprns is None else associated_uprns.copy()
property_type_api_map = {
"Bungalow": "bungalow",
"Flat": "flat",
@ -594,7 +672,10 @@ class SearchEpc:
params["property-type"] = property_type_api_map[property_type]
# We take the 20 nearest homes of the relevant type, so not to pull in too many irrelevant homes
epc_response = self.get_epc(params=params, size=100)
# If we get to the final iteration, we fetch more
size = 1000 if len(postcode) <= 2 else 100
epc_response = self.get_epc(params=params, size=size)
if epc_response["status"] == 200:
epc_data = pd.DataFrame(self.data["rows"])
@ -616,6 +697,17 @@ class SearchEpc:
epc_data["lodgement-datetime"] > (pd.Timestamp.now() - pd.DateOffset(years=10))
]
# Regardless of whether or not we exclude old, we drop any SAP05 entries, which will be problematic
# if we include them
if not epc_data.empty:
epc_data = epc_data[~epc_data["mainheat-description"].str.lower().str.contains("sap05:")]
if not epc_data.empty and heating_system is not None:
# If we arrive at the final iteration, we allow ourself to be less strict on heating system
epc_data = epc_data[
epc_data["mainheat-description"] == heating_system
]
if not epc_data.empty:
# Further processing of the EPC data
@ -661,6 +753,31 @@ class SearchEpc:
estimation_built_form = "End-Terraced"
elif (built_form == "") or (pd.isnull(built_form)):
estimation_built_form = epc_built_form
elif built_form == "Enclosed Mid-Terrace":
# We check if we have any enclosed and if not, we fall back to mid-terrace
if sum(epc_data["built-form"] == "Enclosed Mid-Terrace") > 0:
estimation_built_form = "Enclosed Mid-Terrace"
else:
estimation_built_form = "Mid-Terrace"
elif built_form == "Enclosed End-Terrace":
# An enclosed end terrace has three two external facing walls so we fall back to mid-terrace
if sum(epc_data["built-form"] == "Enclosed End-Terrace") > 0:
estimation_built_form = "Enclosed Mid-Terrace"
else:
estimation_built_form = "Mid-Terrace"
elif built_form == "Detached" and property_type == "Flat":
# We add in a fallback to detached flats, where it can be rarer to see properties of this type
if len(postcode) <= 2:
if sum(epc_data["built-form"] == built_form) > 0:
estimation_built_form = built_form
elif sum(epc_data["built-form"] == "Semi-Detached") > 0:
estimation_built_form = "Semi-Detached"
elif sum(epc_data["built-form"] == "End-Terrace") > 0:
estimation_built_form = "End-Terrace"
else:
estimation_built_form = "Mid-Terrace"
else:
estimation_built_form = built_form
else:
estimation_built_form = built_form
@ -679,7 +796,16 @@ class SearchEpc:
has_missing_built_form = not estimation_built_form
if is_maisonette_with_bad_built_form or is_park_home_without_built_form or has_missing_built_form:
# If we have associated UPRNS, we just filter as such, otherwise
# we filter with built form and property type
if any(str(x) in epc_data["uprn"].astype(str).values for x in associated_uprns_to_apply):
# We check at least one UPRN is in the data
epc_data = epc_data[epc_data["uprn"].isin(associated_uprns_to_apply)]
# After we run this, we empty associated_uprns_to_apply.
# That ensures we don't keep re-applying this filter if we shorten the postcode again
# since we'll keep ending up in the same results
associated_uprns_to_apply = []
elif is_maisonette_with_bad_built_form or is_park_home_without_built_form or has_missing_built_form:
epc_data = epc_data[epc_data["property-type"] == estimation_property_type]
else:
epc_data = epc_data[
@ -696,7 +822,10 @@ class SearchEpc:
# If loop finishes without a valid response, raise an exception
raise Exception("Unable to find postcode data after trimming - investigate me")
def estimate_epc(self, property_type, built_form, lmks_to_drop=None, exclude_old=False):
def estimate_epc(
self, property_type, built_form, lmks_to_drop=None, exclude_old=False, heating_system=None,
associated_uprns=None
):
"""
For a property that does not have an EPC, we retrieve the EPC data for the closest properties
and estimate the EPC for the property in question.
@ -710,6 +839,9 @@ class SearchEpc:
:param lmks_to_drop: This is a list of LMK keys that should be dropped from the estimation process. This
is used as an override for testing, to drop EPCs for the property we are testing
:param exclude_old: Used to drop any expired EPCs (more than 10 years old)
:param heating_system: The heating system of the property we are estimating, if known. Will aim to filter EPCs
to matching heating systems
:param associated_uprns: List of associated UPRNs for the property. E.g. other units in a block of flats
:return:
"""
@ -720,17 +852,20 @@ class SearchEpc:
lmks_to_drop=lmks_to_drop,
built_form=built_form,
property_type=property_type,
exclude_old=exclude_old
exclude_old=exclude_old,
heating_system=heating_system,
associated_uprns=associated_uprns
)
# Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build
# so we avoid comparing it to new builds
# TODO - this is experimental
# TODO - this is experimental - if we have the year the property was built, we should utilise that
# here
newer_age_bands = [
"England and Wales: 1996-2002", "England and Wales: 2003-2006", "England and Wales: 2007-2011",
"England and Wales: 2012 onwards"
]
# We also remove EPCs that are for new dwellings
if (~epc_data["construction-age-band"].isin(newer_age_bands)).sum():
# We have some older age bands, so we need to filter them out
epc_data = epc_data[~epc_data["construction-age-band"].isin(newer_age_bands)].copy()
@ -823,7 +958,7 @@ class SearchEpc:
@staticmethod
def calculate_weighted_lodgement_datetime(epc_data):
numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).view('int64')
numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).astype('int64')
# Calculate the weighted sum of dates
weighted_sum = (numeric_dates * epc_data['weight']).sum()
@ -862,7 +997,7 @@ class SearchEpc:
return agg[key].values[0]
def find_property(self, skip_os=False):
def find_property(self, skip_os=False, api_data=None, overwrite_sap05=False):
"""
This method will attempt to identify a property. It will, at first, use the EPC api to try and
find the EPC for the property and the associated UPRN. If this fails, it will use the Ordnance Survey API to
@ -873,27 +1008,95 @@ class SearchEpc:
as a final check to see if there is any EPC data.
If there is no EPC data, the epc data will be estimated based on the surrounding properties
:param skip_os: If True, the ordnance survey api will be skipped and only the EPC api will be used
:param api_data: If provided, this data will be used instead of querying the EPC api
:param overwrite_sap05: For extrememly old, SAP05 EPCs, we may wish to overwrite them with an estimated EPC.
This is because the SAP05 EPCs will have missing information such as the main heating
will be described as SAP05:Main-Heating, which isn't particularly useful for the
purpose of providing recommendations.
"""
# Step 1: use the epc api to find the property and uprn
response = self.get_epc()
if api_data:
self.data = api_data
response = {"status": 200}
else:
response = self.get_epc()
if response["status"] == 200:
(
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean,
self.address_postal_town
) = self.extract_epc_data(address=self.full_address)
# Before we return, we check if we need to overwrite a SAP05 EPC
# ---- SAP 05 overwriting logic ----
is_sap_05 = "SAP05:" in self.newest_epc.get("mainheat-description", "")
needs_sap_05_overwrite = is_sap_05 and (response["status"] == 200) and overwrite_sap05
# ---- Cleaning expired EPC logic ----
epc_is_expired = (pd.Timestamp.now() - pd.Timestamp(
self.newest_epc.get("lodgement-date", pd.Timestamp.now()))).days > 3650
epc_has_missing_key_data = any([self.newest_epc.get(k) in [None, ""] for k in self.CHECK_MISSING_KEYS])
epc_needs_cleaning = epc_is_expired and epc_has_missing_key_data
# ---- We don't have an epc ----
no_epc = response["status"] != 200
# If we don't have to overwrite SAP05, or we don't have missing data on an expired EPC, we return
if not needs_sap_05_overwrite and not epc_needs_cleaning and not no_epc:
# If the data is fine, or we're preventing SAP05 overwrites, we just exit here
return
# By default, we don't exclude old but we will do, when we are estimating to overwrite a SAP05 EPC
lmks_to_drop, exclude_old = [], False
if needs_sap_05_overwrite or epc_needs_cleaning:
self.overwrite_sap05 = needs_sap_05_overwrite
self.clean_missing_on_expired = epc_needs_cleaning
lmks_to_drop = [self.newest_epc["lmk-key"]]
exclude_old = True
self.heating_system = (
self.newest_epc["mainheat-description"] if
self.clean_missing_on_expired and self.heating_system is None else self.heating_system
)
self.ordnance_survey_client.property_type = self.newest_epc["property-type"]
self.ordnance_survey_client.built_form = self.newest_epc["built-form"]
# Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn
if skip_os:
if self.ordnance_survey_client.property_type is not None:
# We can try and estimate
estimated_epc = self.estimate_epc(
property_type=self.ordnance_survey_client.property_type,
built_form=self.ordnance_survey_client.built_form
built_form=self.ordnance_survey_client.built_form,
heating_system=self.heating_system,
associated_uprns=self.associated_uprns,
lmks_to_drop=lmks_to_drop,
exclude_old=exclude_old
)
self.newest_epc = estimated_epc
self.older_epcs = []
# If we have overwritten a SAP05 EPC, we need to update older_epcs too
if self.overwrite_sap05:
# We keep a record of the fact that we have performed a SAP05 overwrite
estimated_epc["sap_05_overwritten"] = True
self.older_epcs = [self.newest_epc.copy()]
self.newest_epc = estimated_epc
elif self.clean_missing_on_expired:
# We perform the cleaning
for k in self.CHECK_MISSING_KEYS:
if self.newest_epc[k] in ["", None]:
self.newest_epc[k] = estimated_epc[k]
self.newest_epc["estimated"] = True
self.older_epcs = []
else:
self.older_epcs = []
self.newest_epc = estimated_epc
self.full_sap_epc = {}
# Finally, set a standardised address 1 and postcode
@ -917,7 +1120,8 @@ class SearchEpc:
response = self.get_epc()
if response["status"] == 200:
(
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean,
self.address_postal_town
) = self.extract_epc_data()
return
@ -936,6 +1140,22 @@ class SearchEpc:
self.postcode_clean = self.ordnance_survey_client.postcode_os
return
def set_uprn_source(self, file_format):
"""
Utility function to set the uprn source based on the file format. Only works for domna_asset_lists
and this is very much placeholder until we standardised our input data formats
:param file_format:
:return:
"""
if not self.newest_epc:
raise ValueError("No EPC data available to set UPRN source - run find_property first")
if (self.newest_epc.get("estimated") and
(file_format == "domna_asset_list") and
(float(self.newest_epc["uprn"]) < 0)):
self.newest_epc["uprn-source"] = self.UPRN_SOURCE_SIMULATED
def check_attribute_variations(self):
attribute_map = {
"walls-description": {
@ -993,7 +1213,7 @@ class SearchEpc:
return "ground"
def get_metadata(self):
if self.newest_epc is None:
if not self.newest_epc:
raise ValueError("No EPC data available")
# We check if the property has ever been downgraded on SAP

View file

@ -0,0 +1,67 @@
from dataclasses import dataclass
from typing import Optional
@dataclass(slots=True)
class Address:
uprn: Optional[int]
landlord_property_id: Optional[str]
address: Optional[str]
full_address: Optional[str]
postcode: str
property_type: Optional[str]
built_form: Optional[str]
estimated: bool
# Additional address data, associated to a standardised asset list
domna_full_address: Optional[str]
domna_address_1: Optional[str]
landlord_heating_system: Optional[str] = None
solar_reason: Optional[str] = None
cavity_reason: Optional[str] = None
@property
def address1(self):
if self.domna_address_1 is not None:
address1 = self.domna_address_1
else:
address1 = self.address
# Format
address1 = str(int(address1)) if isinstance(address1, float) else str(address1)
return address1
@property
def request_data(self) -> dict[str, Optional[str]]:
"""
Canonical request payload for downstream services.
"""
data = {
"uprn": self.uprn,
"landlord_property_id": self.landlord_property_id,
"postcode": self.postcode,
"address1": self.address1,
"full_address": self.full_address,
}
# Drop nulls
return {k: v for k, v in data.items() if v is not None}
@property
def heating_system(self):
"""
Helper function to extract a heating system, which can be used to estimate EPC. This is a very limited,
placeholder function to cover some initial immediate cases.
:return:
"""
ll_heating = self.landlord_property_id
if not ll_heating:
return None
if ll_heating == "electric storage heaters":
# Return with the same format at the EPC
return "Electric storage heaters"
return None

View file

@ -0,0 +1,84 @@
from backend.addresses.Address import Address
class Addresses:
def __init__(self, addresses: list[Address]):
self._addresses = addresses
# self._identity_index = self._build_identity_index()
def __getitem__(self, index: int) -> Address:
return self._addresses[index]
def __len__(self) -> int:
return len(self._addresses)
@classmethod
def from_plan_input(cls, plan_input: list[dict], body) -> "Addresses":
addresses = []
for row in plan_input:
addresses.append(cls._parse_row(row, body))
return cls(addresses)
def get_uprns(self):
return [x.uprn for x in self._addresses if x.uprn is not None]
def get_landlord_ids(self):
return [x.landlord_property_id for x in self._addresses if x.landlord_property_id is not None]
def get_unique_postcodes(self):
return list({x.postcode for x in self._addresses})
def get_postcodes_for_flats(self):
# Method to extract all of the postcodes associated to a flat, which is used for remote assessments
# on flats
return [x.postcode for x in self._addresses if x.property_type in ["Flat", "flat"]]
def get_property_requests(self):
return [x.request_data for x in self._addresses]
@staticmethod
def _parse_row(row: dict, body) -> Address:
def clean_uprn(v):
try:
return int(float(v))
except (TypeError, ValueError):
return None
uprn = clean_uprn(row.get("uprn"))
address = row.get("address")
if not address and body.file_format == "domna_asset_list":
address = row.get("domna_address_1")
full_address = (
row.get("domna_full_address")
if body.file_format == "domna_asset_list"
else None
)
if not isinstance(full_address, str):
full_address = None
postcode = str(row["postcode"]).strip().upper()
return Address(
uprn=uprn,
landlord_property_id=str(row["landlord_property_id"])
if row.get("landlord_property_id") else None,
address=str(address).strip() if address else None,
full_address=str(full_address).strip() if full_address else None,
postcode=postcode,
property_type=row.get("property_type"),
built_form=row.get("built_form"),
estimated=bool(row.get("estimated", False)),
domna_full_address=row.get("domna_full_address"),
domna_address_1=row.get("domna_address_1"),
)
# def _build_identity_index(self) -> dict:
# index = {}
# for addr in self._addresses:
# key = addr.identity_key()
# if key in index:
# raise ValueError(f"Duplicate address identity detected: {key}")
# index[key] = addr
# return index

View file

@ -332,7 +332,6 @@ class GoogleSolarApi:
)
if solar_product is None:
logger.info("No suitable solar product found for the configuration with %d panels.", total_panels)
continue
total_cost = Costs.solar_pv(
@ -480,9 +479,7 @@ class GoogleSolarApi:
roi_results = pd.DataFrame(roi_results)
panel_performance = panel_performance.merge(
roi_results, how="left", on="n_panels"
)
panel_performance = panel_performance.merge(roi_results, how="left", on="n_panels")
# We want max roi, minimal generation deficit, and max generation value - we create a ranking score
# Assign equal weights to each metric
@ -707,7 +704,7 @@ class GoogleSolarApi:
# We set the target rating to EPC C, which is the typical EPC rating we would expect the
# property to achieve post retrofit of just the fabric
"energy_consumption": cls.estimate_new_consumption(
current_energy_efficiency=p.data["current-energy-efficiency"],
current_energy_efficiency=min(p.data["current-energy-efficiency"], 100),
target_efficiency="69",
current_consumption=p.estimate_electrical_consumption(
assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
@ -726,7 +723,7 @@ class GoogleSolarApi:
# We set the target rating to EPC C, which is the typical EPC rating we would expect the
# property to achieve post retrofit of just the fabric
"energy_consumption": cls.estimate_new_consumption(
current_energy_efficiency=p.data["current-energy-efficiency"],
current_energy_efficiency=min(int(p.data["current-energy-efficiency"]), 100),
target_efficiency="69",
current_consumption=p.estimate_electrical_consumption(
assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions
@ -743,7 +740,7 @@ class GoogleSolarApi:
@classmethod
def building_solar_analysis(
cls, building_solar_config: List, input_properties: List[Property], session, google_solar_api_key: str,
solar_materials: list
solar_materials: list,
):
"""
Perform the solar analysis for the building level
@ -827,9 +824,21 @@ class GoogleSolarApi:
@classmethod
def unit_solar_analysis(
cls, unit_solar_config: List, input_properties: List[Property], session, body, google_solar_api_key: str,
solar_materials: list
solar_materials: list, inspections_map: dict
):
"""
Perform the solar analysis for the unit level
:param unit_solar_config: List of unit solar configurations
:param input_properties: List of properties
:param session: Database session
:param body: PlanTriggerRequest instance
:param google_solar_api_key: Google Solar API key
:param solar_materials: List of solar materials
:param inspections_map: Dictionary mapping property IDs to inspection data
:return:
"""
if not unit_solar_config:
return input_properties
@ -855,18 +864,21 @@ class GoogleSolarApi:
):
continue
solar_api_client = cls(api_key=google_solar_api_key, solar_materials=solar_materials)
if unit["longitude"] is None or unit["latitude"] is None:
# At this point, we've checked that solar PV is valid, and so we provide some defaults
property_instance.set_solar_panel_configuration(
solar_panel_configuration={
"insights_data": None,
"panel_performance": cls.default_panel_performance(property_instance=property_instance),
"panel_performance": solar_api_client.default_panel_performance(
property_instance=property_instance
),
"unit_share_of_energy": 1
},
)
continue
solar_api_client = cls(api_key=google_solar_api_key, solar_materials=solar_materials)
solar_api_client.get(
longitude=unit["longitude"],
latitude=unit["latitude"],
@ -877,6 +889,15 @@ class GoogleSolarApi:
property_instance=property_instance,
)
property_inspections = inspections_map.get(property_instance.id, {})
if property_inspections:
# If we have some inspections data, we check if we have some data which indicates solar cannot
# be installed. We're loose about this now since this is post review
if solar_api_client.panel_performance.empty:
# We assume solar is a suitable option
solar_api_client.panel_performance = solar_api_client.default_panel_performance(property_instance)
# Store the data in the database
solar_api_client.save_to_db(
session=session,
@ -921,12 +942,43 @@ class GoogleSolarApi:
None
)
if material_1_6 is None or material_3_2 is None:
material_4_35 = next(
(m for m in self.solar_materials if m["type"] == "solar_pv" and
abs(m["size"] - 4.35) < 0.1 and not m["includes_battery"]),
None
)
if material_1_6 is None or material_3_2 is None or material_4_35 is None:
raise ValueError("No suitable solar product found for the default configuration.")
# We return a 1.6 and 3.2 kwp system
panel_performance = pd.DataFrame(
[
{
'n_panels': 10,
'yearly_dc_energy': 4350 * assumptions.MEDIAN_WATTAGE_TO_DC,
'total_cost': cost_instance.solar_pv(
solar_product=material_4_35,
scaffolding_options=[
{"total_cost": 1000, "size": property_instance.number_of_floors},
{"total_cost": 1000, "size": 3}
],
n_floors=property_instance.number_of_floors
)["total"],
'weighted_ratio': None,
'panneled_roof_area': 9 * assumptions.RDSAP_AREA_PER_PANEL,
'array_wattage': 4350,
'initial_ac_kwh_per_year': 4350 * assumptions.MEDIAN_WATTAGE_TO_AC,
'lifetime_ac_kwh': None,
'lifetime_dc_kwh': None,
'roi': None,
'generation_value': None,
'generation_deficit': None,
'expected_payback_years': None,
'surplus': None,
'combined_score': None,
'rank': None
},
{
'n_panels': 8,
'yearly_dc_energy': 3200 * assumptions.MEDIAN_WATTAGE_TO_DC,
@ -979,4 +1031,22 @@ class GoogleSolarApi:
},
]
)
# We add the key elements that are required for the database
panel_performance['lifetime_ac_kwh'] = panel_performance.apply(
self.lifetime_production_kwh,
axis=1,
efficiency_depreciation_factor=self.efficiency_depreciation_factor,
installation_life_span=self.installation_life_span,
column_name="initial_ac_kwh_per_year"
)
panel_performance['lifetime_dc_kwh'] = panel_performance.apply(
self.lifetime_production_kwh,
axis=1,
efficiency_depreciation_factor=self.efficiency_depreciation_factor,
installation_life_span=self.installation_life_span,
column_name="yearly_dc_energy",
)
return panel_performance

View file

@ -0,0 +1,30 @@
import numpy as np
class BatterySAPScorer:
"""
Lightweight production scorer no sklearn dependency.
Uses hard-coded coefficients discovered offline. The code for discovering the coefficients
can be found in etl/battery_model/train.py
We're only concerned with SAP, as we already have a method for carbon and bill savings.
"""
INTERCEPT = 10.310168559226678
COEF_STARTING_SAP = -0.16120648633993315
COEF_PV_SIZE = 1.0500492005420736
@classmethod
def score(cls, starting_sap, pv_size):
"""
heating_system: string used to infer is_electric
"""
sap_uplift = (
cls.INTERCEPT
+ cls.COEF_STARTING_SAP * starting_sap
+ cls.COEF_PV_SIZE * pv_size
)
# Round + clamp to [1,5] - there are only a small number of cases with 0 points
sap_uplift = int(np.round(np.clip(sap_uplift, 1, 5)))
return sap_uplift

View file

@ -77,7 +77,24 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
"Electric ceiling heating, electric": {"fuel": "Electricity", "cop": 1},
"Air source heat pump, warm air, electric": {
"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100
}
},
"Electric heat pump for water heating only": {"fuel": "Electricity", "cop": 1},
"Ground source heat pump, warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100},
"Room heaters, mains gas, Electric storage heaters": {"fuel": "Natural Gas", "cop": 0.85},
"Water source heat pump, radiators, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100},
"Air source heat pump, Systems with radiators, electric": {"fuel": "Electricity",
"cop": AVERAGE_ASHP_EFFICIENCY / 100},
"Ground source heat pump, underfloor, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100},
"Electric ceiling heating": {"fuel": "Electricity", "cop": 1},
"Boiler and radiators, wood chips": {"fuel": "Wood Logs", "cop": 0.85},
"Oil range cooker, no cylinder thermostat": {"fuel": "Oil", "cop": 0.85},
"Air source heat pump, Warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100},
"Boiler and underfloor heating, electric": {"fuel": "Electricity", "cop": 1},
"Community scheme with CHP, mains gas": {"fuel": "Natural Gas", "cop": 0.85},
"Air source heat pump, radiators and underfloor, electric": {"fuel": "Electricity",
"cop": AVERAGE_ASHP_EFFICIENCY / 100},
"Electric ceiling heating, radiators, electric": {"fuel": "Electricity", "cop": 1},
"Boiler and underfloor heating, mains gas, Boiler and radiators, mains gas": {"fuel": "Natural Gas", "cop": 0.85},
}
# These are the measure types where if there is a ventilation recommendation, we force the inclusion of it
@ -88,3 +105,12 @@ measures_needing_ventilation = [
# If we have a property beyond this size, we assume it's likely large enough to have an ASHP
ASHP_FLOOR_AREA_THRESHOLD = 120 # m2
# Is a placeholder, used for cleaning data. Is a flat average based on the estimated
AVERAGE_LIGHTING_COST = 100
# Average bill, based on british gas is #1,838.71. Subtract 100 for lighting, 228 for hot water. This will include
# appliances so appliances should be removed when this is used
AVERAGE_HEATING_AND_APPLIANCE_COST = 1510.71
# Based on https://energysavingtrust.org.uk/sites/default/files/reports/AtHomewithWater%287%29.pdf
AVERAGE_HOT_WATER_COST = 228

View file

@ -1,5 +1,6 @@
from functools import lru_cache
from pydantic_settings import BaseSettings
from typing import Optional
class Settings(BaseSettings):
@ -35,6 +36,11 @@ class Settings(BaseSettings):
# Other S3 buckts
ENERGY_ASSESSMENTS_BUCKET: str
# Optional AWS creds (only required in local)
AWS_ACCESS_KEY_ID: Optional[str] = None
AWS_SECRET_KEY_ID: Optional[str] = None
AWS_DEFAULT_REGION: Optional[str] = None
class Config:
env_file = "backend/.env"

View file

@ -1,5 +1,7 @@
from sqlalchemy import create_engine
from contextlib import contextmanager
from backend.app.config import get_settings
from sqlmodel import Session
connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
db_string = connection_string.format(
@ -11,4 +13,42 @@ db_string = connection_string.format(
dbname=get_settings().DB_NAME,
)
db_engine = create_engine(db_string, pool_size=5, max_overflow=5)
# db_engine = create_engine(db_string, pool_size=5, max_overflow=5)
# Adjusted database connection to decease pool size for serverless environments (from lambda) so that
# each lambda doesn't hog all connections
db_engine = create_engine(
db_string,
pool_size=3,
max_overflow=5, # Limit the number of extra connections. With this and pool size, we allow 1 connection per lambda
pool_pre_ping=True,
pool_recycle=300, # Forces SQLAlchemy to close and reopen any connection older than 300 seconds
)
def get_db_session():
if db_engine is None:
raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.")
return Session(db_engine)
@contextmanager
def db_session():
session = Session(db_engine)
try:
yield session
session.commit()
except Exception:
session.rollback()
raise
finally:
session.close()
@contextmanager
def db_read_session():
session = Session(db_engine, expire_on_commit=False)
try:
yield session
finally:
session.close()

View file

@ -0,0 +1,13 @@
from .epc_functions import *
from .address_functions import *
from .portfolio_functions import *
from .energy_assessment_functions import *
from .property_functions import *
from .recommendations_functions import *
from .solar_functions import *
from .funding_functions import *
from .materials_functions import *
from .inspections_functions import *
from .non_intrusive_surveys import *
from .whlg_functions import *
from .already_installed_functions import *

View file

@ -0,0 +1,114 @@
from sqlalchemy.orm import Session
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy import func
from backend.app.db.models.addresses import PostcodeSearch
from utils.logger import setup_logger
logger = setup_logger()
def _get_associated_records(results, uprn, uprn_key="UPRN"):
matched_record = []
for x in results:
if "DPA" in x:
if x["DPA"].get(uprn_key) == str(uprn):
matched_record.append(x["DPA"])
else:
if x["LPI"].get(uprn_key) == str(uprn):
matched_record.append(x["LPI"])
return matched_record
def get_associated_uprns(postcode_search: PostcodeSearch, uprn: str | int):
"""
Given a postcode and UPRN, for a remote assessment, fetch all associated UPRNs, based
on parent UPRN. This will be properties in the same building
Parent UPRN is referenced in the following docs:
https://static.geoplace.co.uk/downloads/GeoPlace-Data-Entry-Conventions-Best-Practice-for-Addresses.pdf
:param PostcodeSearch postcode_search: The postcode search record
:param uprn: The UPRN string to match
:return: The matching PostcodeSearch record, or None if not found
"""
if not postcode_search:
return []
if isinstance(uprn, int):
# For this, coerce to string
uprn = str(uprn)
matched_record = _get_associated_records(results=postcode_search.result_data["results"], uprn=uprn)
if len(matched_record) != 1:
return []
if not matched_record[0].get("PARENT_UPRN"):
logger.info("No parent UPRN found, cannot get associated records")
return []
associated_records = _get_associated_records(
results=postcode_search.result_data["results"], uprn=matched_record[0]["PARENT_UPRN"], uprn_key="PARENT_UPRN"
)
# We now fetch all UPRNS with the same parent UPRN
associated_uprns = [int(x["UPRN"]) for x in associated_records if x["UPRN"] != str(uprn)]
return associated_uprns
def get_by_postcodes(session: Session, postcodes: list[str]) -> dict[str, PostcodeSearch]:
"""
Given a list of postcodes, retrieves postcode data from the database form the PostcodeSearch table
:param session:
:param postcodes:
:return:
"""
if not postcodes:
return {}
normalised = {p.upper() for p in postcodes if p}
records = (
session.query(PostcodeSearch)
.filter(func.upper(PostcodeSearch.postcode).in_(normalised))
.all()
)
return {r.postcode.upper(): r for r in records}
def get_associated_uprns_from_record(record: PostcodeSearch, uprn: str) -> list[int]:
"""
Given the postcode sra
:param record:
:param uprn:
:return:
"""
if not record:
return []
matched_record = _get_associated_records(
results=record.result_data["results"],
uprn=uprn
)
if len(matched_record) != 1:
return []
parent_uprn = matched_record[0].get("PARENT_UPRN")
if not parent_uprn:
return []
associated_records = _get_associated_records(
results=record.result_data["results"],
uprn=parent_uprn,
uprn_key="PARENT_UPRN"
)
return [
int(x["UPRN"])
for x in associated_records
if x["UPRN"] != str(uprn)
]

View file

@ -0,0 +1,40 @@
from backend.app.db.models.recommendations import InstalledMeasure
from typing import Dict, List, Set
from collections import defaultdict
def get_installed_measure_types_by_uprns(
session,
uprns: List[int],
) -> Dict[int, Set[str]]:
"""
Returns installed measure types per UPRN.
{
uprn: {"cavity_wall_insulation", "mechanical_ventilation", ...}
}
"""
if not uprns:
return {}
rows = (
session.query(
InstalledMeasure.uprn,
InstalledMeasure.measure_type,
)
.filter(InstalledMeasure.is_active.is_(True))
.filter(InstalledMeasure.uprn.in_(uprns))
.all()
)
out: Dict[int, Set[str]] = defaultdict(set)
for uprn, measure_type in rows:
out[uprn].add(
measure_type.value
if hasattr(measure_type, "value")
else measure_type
)
return out

View file

@ -1,3 +1,4 @@
from typing import Iterable
from backend.app.db.models.energy_assessments import (
EnergyAssessment, EnergyAssessmentScenarios, EnergyAssessmentDocuments, DocumentTypeEnum
)
@ -63,27 +64,48 @@ def bulk_insert_energy_assessments(session: Session, data_list: List[dict]) -> D
return uprn_to_assessment_id
def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[EnergyAssessment]:
def get_latest_assessments_for_uprns(
session: Session,
uprns: Iterable[int],
) -> dict[int, dict]:
"""
Retrieve the latest energy assessment for a given UPRN based on the inspection date.
Fetch the latest energy assessment per UPRN in a single query.
:param session: The database session
:param uprn: The unique property reference number
:return: The latest EnergyAssessment object or None if not found
Returns a dict:
uprn -> assessment_dict | empty_response
"""
if not uprn:
return EnergyAssessment.empty_response()
uprns = [u for u in uprns if u]
if not uprns:
return {}
try:
# Query the EnergyAssessment model, filter by uprn, order by inspection_date in descending order
latest_assessment = session.query(EnergyAssessment).filter_by(uprn=uprn).order_by(
desc(EnergyAssessment.inspection_date)).first()
# DISTINCT ON requires matching ORDER BY
records = (
session.query(EnergyAssessment)
.filter(EnergyAssessment.uprn.in_(uprns))
.order_by(
EnergyAssessment.uprn,
desc(EnergyAssessment.inspection_date),
)
.distinct(EnergyAssessment.uprn)
.all()
)
return latest_assessment.to_dict() if latest_assessment else EnergyAssessment.empty_response()
except Exception as e:
logger.info(f"An error occurred: {e}")
return None
result: dict[int, dict] = {}
for record in records:
result[record.uprn] = record.to_dict()
# Fill missing uprns with empty response
uprn_set = set(uprns)
found_set = set(result.keys())
missing_uprns = uprn_set - found_set
for uprn in missing_uprns:
result[uprn] = EnergyAssessment.empty_response()
return result
def create_scenarios_for_documents(session: Session, document_list: List[dict], uprn_to_assessment_id: dict):

View file

@ -0,0 +1,229 @@
from typing import List
from datetime import datetime, timedelta, timezone
from sqlalchemy.exc import SQLAlchemyError
from backend.app.db.models.epc import EpcStore
from sqlmodel import Session
from sqlalchemy.dialects.postgresql import insert
class EpcStoreService:
"""
Service layer for EPC data lookup and persistence.
"""
FRESHNESS_DAYS = 30
# status labels
FRESH = "fresh"
EXPIRED = "expired"
MISSING = "missing"
@classmethod
def get_epc_for_uprn(cls, session: Session, uprn: int):
"""
Query EPC data for a given UPRN and return a dict describing:
- epc_api: only if within last 30 days
- epc_page: only if epc_api exists
- status: 'fresh', 'expired', or 'missing'
"""
record = session.query(EpcStore).filter(EpcStore.uprn == uprn).first()
if not record:
return {"status": cls.MISSING, "epc_api": None, "epc_page": None}
if not record.epc_api_created_at:
# API data missing → treat as missing even if page data exists
return {"status": cls.MISSING, "epc_api": None, "epc_page": None}
# check freshness
cutoff = datetime.now(timezone.utc) - timedelta(days=EpcStoreService.FRESHNESS_DAYS)
if record.epc_api_created_at.date() < cutoff.date():
return {"status": cls.EXPIRED, "epc_api": None, "epc_page": None}
# Fresh API → include page only if present
return {
"status": cls.FRESH,
"epc_api": record.epc_api,
"epc_page": record.epc_page if record.epc_page else None,
"epc_page_rrn": record.epc_page_rrn,
"epc_api_created_at": record.epc_api_created_at,
"epc_page_created_at": record.epc_page_created_at,
}
@classmethod
def get_epcs_for_uprns(cls, session: Session, uprns: List[int]) -> dict[int, dict]:
"""
Given a list of uprns, return a dict mapping each uprn to its EPC data status and content.
:param session:
:param uprns:
:return:
"""
if not uprns:
return {}
cutoff = datetime.now(timezone.utc) - timedelta(days=cls.FRESHNESS_DAYS)
records = (
session.query(EpcStore)
.filter(EpcStore.uprn.in_(uprns))
.all()
)
result: dict[int, dict] = {}
for record in records:
if not record.epc_api_created_at:
result[record.uprn] = {
"status": cls.MISSING,
"epc_api": None,
"epc_page": None,
"epc_page_rrn": None,
"epc_api_created_at": None,
"epc_page_created_at": None,
}
continue
if record.epc_api_created_at.date() < cutoff.date():
# We only expose epc_page when epc_api is fresh.
result[record.uprn] = {
"status": cls.EXPIRED,
"epc_api": None,
"epc_page": None,
"epc_page_rrn": None,
"epc_api_created_at": None,
"epc_page_created_at": None,
}
continue
result[record.uprn] = {
"status": cls.FRESH,
"epc_api": record.epc_api,
"epc_page": record.epc_page,
"epc_page_rrn": record.epc_page_rrn,
"epc_api_created_at": record.epc_api_created_at,
"epc_page_created_at": record.epc_page_created_at,
}
# For the uprns not found in records, mark them as missing
requested = set(uprns)
found = set(result.keys())
missing = requested - found
for uprn in missing:
result[uprn] = {
"status": cls.MISSING,
"epc_api": None,
"epc_page": None,
"epc_page_rrn": None,
"epc_api_created_at": None,
"epc_page_created_at": None,
}
return result
@classmethod
def check_insert_needed(cls, epc_cache, epc_estimated, uprn):
"""
Check if an insert is needed based on existing data.
:return:
"""
no_existing_epc_cache = epc_cache.get("epc_api") is None
existing_cache_expired = (
epc_cache.get("status") == cls.EXPIRED
)
needs_insert = bool((no_existing_epc_cache or existing_cache_expired) and not epc_estimated and uprn)
return needs_insert
@staticmethod
def upsert_epc_data(
session: Session,
uprn: int,
epc_api: dict | None,
epc_page: str | None,
epc_page_rrn: str | None,
epc_api_created_at: datetime | None = None,
epc_page_created_at: datetime | None = None,
):
"""
Insert or update EPC data for a UPRN.
Rules:
- If record exists update it
- If record does not exist create new
"""
try:
record = session.query(EpcStore).filter(EpcStore.uprn == uprn).first()
if record:
# update path
if epc_api is not None:
record.epc_api = epc_api
if epc_api_created_at is None:
epc_api_created_at = datetime.now(timezone.utc)
record.epc_api_created_at = epc_api_created_at
# update page data only if BOTH:
# 1) the caller passed page data
# 2) epc_api is not None (page only allowed when API exists)
if epc_page is not None and epc_api is not None:
record.epc_page = epc_page
record.epc_page_rrn = epc_page_rrn
if epc_page_created_at is None:
epc_page_created_at = datetime.now(timezone.utc)
record.epc_page_created_at = epc_page_created_at
else:
# insert path
record = EpcStore(
uprn=uprn,
epc_api=epc_api,
epc_api_created_at=epc_api_created_at,
epc_page=epc_page if epc_api is not None else None,
epc_page_rrn=epc_page_rrn if epc_api is not None else None,
epc_page_created_at=epc_page_created_at if epc_api is not None else None,
)
session.add(record)
return record
except SQLAlchemyError as e:
raise e
@classmethod
def bulk_upsert_epc_data(cls, session: Session, rows_to_insert: list[dict]):
if not rows_to_insert:
return
now = datetime.now(timezone.utc)
values = [
{
"uprn": row["uprn"],
"epc_api": row["epc_api"],
"epc_api_created_at": now,
"epc_page": row["epc_page"],
"epc_page_rrn": row["epc_page_rrn"],
"epc_page_created_at": now if row["epc_page"] else None,
}
for row in rows_to_insert
]
insert_stmt = insert(EpcStore).values(values)
stmt = insert_stmt.on_conflict_do_update(
index_elements=["uprn"],
set_={
"epc_api": insert_stmt.excluded.epc_api,
"epc_api_created_at": insert_stmt.excluded.epc_api_created_at,
"epc_page": insert_stmt.excluded.epc_page,
"epc_page_rrn": insert_stmt.excluded.epc_page_rrn,
"epc_page_created_at": insert_stmt.excluded.epc_page_created_at,
},
)
session.execute(stmt)
session.commit()

View file

@ -1,5 +1,6 @@
from sqlalchemy.orm import Session
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy import insert
from backend.app.db.models.funding import FundingPackage, FundingPackageMeasures
@ -69,3 +70,72 @@ def upload_funding(session: Session, p, plan_id, recommendations_to_upload):
session.rollback()
print(f"An error occurred: {e}")
return False
def bulk_upload_funding_packages(
session: Session,
funding_payload: list[dict],
):
"""
Bulk upload:
- funding_package
- funding_package_measures
Assumes caller manages the transaction.
"""
if not funding_payload:
return
# ---------------------------------------------------------
# 1. Prepare funding package rows
# ---------------------------------------------------------
funding_rows = []
measures_by_index = []
for f in funding_payload:
funding_rows.append({
"plan_id": f["plan_id"],
"scheme": f["scheme"],
"project_funding": f["project_funding"],
"total_uplift": f["total_uplift"],
"full_project_score": f["full_project_score"],
"partial_project_score": f["partial_project_score"],
"uplift_project_score": f["uplift_project_score"],
})
measures_by_index.append(f.get("measures", []))
# ---------------------------------------------------------
# 2. Insert funding packages and get IDs
# ---------------------------------------------------------
result = session.execute(
insert(FundingPackage)
.values(funding_rows)
.returning(FundingPackage.id)
)
funding_package_ids = [row[0] for row in result]
# ---------------------------------------------------------
# 3. Insert funding package measures
# ---------------------------------------------------------
measures_rows = []
for funding_package_id, measures in zip(
funding_package_ids, measures_by_index
):
for m in measures:
measures_rows.append({
"funding_package_id": funding_package_id,
"measure": m["measure"],
"material_id": m["material_id"],
"innovation_uplift": m["innovation_uplift"],
"partial_project_score": m["partial_project_score"],
"uplift_project_score": m["uplift_project_score"],
})
if measures_rows:
session.execute(
insert(FundingPackageMeasures).values(measures_rows)
)

View file

@ -0,0 +1,212 @@
import re
from dataclasses import dataclass, asdict
from typing import Optional, Dict, Any, Type, TypeVar
from sqlalchemy.orm import Session
from enum import Enum
from datetime import datetime, timedelta
import math
import pytz
import enum
from backend.app.db.models.inspections import (
InspectionModel,
InspectionArchetype,
InspectionArchetype2,
InspectionsWallConstruction,
InspectionsWallInsulation,
InspectionsInsulationMaterial,
InspectionBorescoped,
InspectionsRoofOrientation,
InspectionsTileHung,
InspectionsRendered,
InspectionsCladding,
InspectionsAccessIssues,
)
NON_INTRUSIVE_PREFIX = "non-intrusives:"
@dataclass
class InspectionData:
archetype: Optional[InspectionArchetype] = None
archetype_2: Optional[InspectionArchetype2] = None
wall_construction: Optional[InspectionsWallConstruction] = None
insulation: Optional[InspectionsWallInsulation] = None
insulation_material: Optional[InspectionsInsulationMaterial] = None
borescoped: Optional[InspectionBorescoped] = None
roof_orientation: Optional[InspectionsRoofOrientation] = None
tile_hung: Optional[InspectionsTileHung] = None
rendered: Optional[InspectionsRendered] = None
cladding: Optional[InspectionsCladding] = None
access_issues: Optional[InspectionsAccessIssues] = None
date: Optional[datetime] = None # Reflects the date when the survey was actually conducted
notes: Optional[str] = None
surveyor_name: Optional[str] = None
def _clean_string(value: Any) -> Optional[str]:
"""Normalize strings for enum matching, tolerant of NaN/None."""
if value is None:
return None
if isinstance(value, float) and math.isnan(value):
return None
if not isinstance(value, str):
return None
v = (
value.strip()
.lower()
.replace("", '"')
.replace("", '"')
.replace("", "'")
)
return re.sub(r"\s+", " ", v)
E = TypeVar("E", bound=Enum)
def _match_enum(value: Any, enum_cls: Type[E]) -> Optional[E]:
"""Case-insensitive fuzzy matching for enums, tolerant of NaN/None."""
v = _clean_string(value)
if not v:
return None
for e in enum_cls:
if v == e.value.lower():
return e
for e in enum_cls:
if v in e.value.lower() or e.value.lower() in v:
return e
return None
def _lower_key_dict(d: dict) -> dict:
"""Convert all keys to lowercase for case-insensitive lookup."""
return {str(k).lower(): v for k, v in d.items() if isinstance(k, str)}
def extract_inspection_data(config: Dict[str, Any]) -> Optional[InspectionData]:
"""Extract and map inspection data from a config row."""
config_lower = _lower_key_dict(config)
non_intrusive_fields = {
k: v for k, v in config_lower.items()
if k.startswith(NON_INTRUSIVE_PREFIX)
}
if not non_intrusive_fields:
return None
data = InspectionData()
data.archetype = _match_enum(
config_lower.get("non-intrusives: archetype"), InspectionArchetype
)
data.archetype_2 = _match_enum(
config_lower.get("non-intrusives: archetype 2"), InspectionArchetype2
)
data.wall_construction = _match_enum(
config_lower.get("non-intrusives: construction"), InspectionsWallConstruction
)
data.insulation = _match_enum(
config_lower.get("non-intrusives: insulated"), InspectionsWallInsulation
)
data.insulation_material = _match_enum(
config_lower.get("non-intrusives: material"), InspectionsInsulationMaterial
)
data.borescoped = _match_enum(
config_lower.get("non-intrusives: boroscoped?"), InspectionBorescoped
)
data.roof_orientation = _match_enum(
config_lower.get("non-intrusives: roof orientation"), InspectionsRoofOrientation
)
data.tile_hung = _match_enum(
config_lower.get("non-intrusives: tile hung"), InspectionsTileHung
)
data.rendered = _match_enum(
config_lower.get("non-intrusives: rendered"), InspectionsRendered
)
data.cladding = _match_enum(
config_lower.get("non-intrusives: cladding"), InspectionsCladding
)
data.access_issues = _match_enum(
config_lower.get("non-intrusives: access issues"), InspectionsAccessIssues
)
data.date = config_lower.get("non-intrusives: date")
data.notes = config_lower.get("non-intrusives: further surveyor notes")
# convert surveyor name to title case if present
data.surveyor_name = config_lower.get("non-intrusives: name of surveyor").title() if config_lower.get(
"non-intrusives: name of surveyor") else None
return data
def bulk_upsert_inspections_pg(session: Session, inspections_map):
"""
Bulk insert/update inspection records:
- 'created_at' = actual survey date
- 'uploaded_at' = time of upload or update
- If an inspection exists for the same property on the same date overwrite
- Otherwise insert a new record
"""
if not inspections_map:
return
now = datetime.now(pytz.utc)
for property_id, data in inspections_map.items():
# Extract survey date from the data
record = asdict(data)
survey_date = getattr(data, "survey_date", None) or record.get("survey_date")
if not survey_date:
continue # skip if no survey date available
# Convert to UTC datetime if needed
if hasattr(survey_date, "to_pydatetime"):
survey_date = survey_date.to_pydatetime()
if survey_date.tzinfo is None:
survey_date = survey_date.replace(tzinfo=pytz.utc)
record["property_id"] = property_id
record["created_at"] = survey_date
record["uploaded_at"] = now
# Normalize enums and NaNs
for key, value in record.items():
if isinstance(value, enum.Enum):
record[key] = value.value
elif isinstance(value, float) and math.isnan(value):
record[key] = None
# Find existing inspection *for same property on same day*
start_of_day = survey_date.replace(hour=0, minute=0, second=0, microsecond=0)
end_of_day = start_of_day + timedelta(days=1)
existing_inspection = (
session.query(InspectionModel)
.filter(
InspectionModel.property_id == property_id,
InspectionModel.created_at >= start_of_day,
InspectionModel.created_at < end_of_day,
)
.first()
)
if existing_inspection:
# Overwrite existing record (same survey day)
for field, value in record.items():
setattr(existing_inspection, field, value)
existing_inspection.uploaded_at = now
else:
# Create new inspection for new day
new_inspection = InspectionModel(**record)
session.add(new_inspection)
session.flush()

View file

@ -3,16 +3,20 @@
###
import datetime
import pytz
from sqlalchemy import select, or_, bindparam, update
from sqlalchemy.orm import Session
from sqlalchemy.orm.exc import NoResultFound
from sqlalchemy.dialects.postgresql import insert
from backend.addresses.Address import Address
from backend.app.db.models.portfolio import (
PropertyModel, PropertyCreationStatus, PortfolioStatus, PropertyTargetsModel, PropertyDetailsEpcModel,
PropertyDetailsSpatial
)
from sqlalchemy.orm.exc import NoResultFound
def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str,
energy_assessment: dict) -> (int, bool):
energy_assessment: dict, landlord_property_id: str | None = None) -> (int, bool):
"""
This function will create a record for the property in the database if it does not exist.
If it does exist, it will just update the updated_at field.
@ -20,6 +24,9 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode:
:param portfolio_id: The ID of the portfolio the property belongs to
:param address: The address of the property
:param postcode: The postcode of the property
:param uprn: The UPRN of the property
:param energy_assessment: The energy assessment data for the property
:param landlord_property_id: The landlord property ID if available
:return: The ID of the property and a boolean indicating whether it was created or not
"""
@ -49,6 +56,7 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode:
postcode=postcode,
portfolio_id=portfolio_id,
uprn=uprn,
landlord_property_id=landlord_property_id,
creation_status=PropertyCreationStatus.LOADING,
status=status,
has_pre_condition_report=False,
@ -63,6 +71,30 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode:
return new_property.id, True
def ensure_property_exists(session, body, epc_searcher, energy_assessment, landlord_property_id=None):
"""
Wrapper funtion which checks if a property is new and will return the roperty type if not
:param session:
:param body:
:param epc_searcher:
:param energy_assessment:
:param landlord_property_id:
:return:
"""
property_id, is_new = create_property(
session=session,
portfolio_id=body.portfolio_id,
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,
uprn=epc_searcher.uprn,
energy_assessment=energy_assessment,
landlord_property_id=str(landlord_property_id) if landlord_property_id is not None else None
)
if not is_new and not body.multi_plan:
return None, False
return property_id, is_new
def create_property_targets(
session: Session, property_id: int, portfolio_id: int, epc_target=None, heat_demand_target=None
):
@ -175,3 +207,162 @@ def update_or_create_property_spatial_details(session: Session, uprn: int, prope
session.flush()
return True
def get_existing_properties(session, portfolio_id, uprns, landlord_ids):
"""
Bulk method for checking for existing properties
:param session:
:param portfolio_id:
:param uprns:
:param landlord_ids:
:return:
"""
return (
session.exec(
select(PropertyModel)
.where(PropertyModel.portfolio_id == portfolio_id)
.where(
or_(
PropertyModel.uprn.in_(uprns),
PropertyModel.landlord_property_id.in_(landlord_ids),
)
)
)
.scalars()
.all()
)
def bulk_create_properties(
session,
body,
addresses: list[Address], # these are *new* addresses
energy_assessment_by_uprn: dict[int, dict],
):
rows = []
for addr in addresses:
energy_assessment = energy_assessment_by_uprn.get(addr.uprn, {})
status = (
PortfolioStatus.ASSESSMENT.value
if not energy_assessment.get("epc")
else PortfolioStatus.SURVEY.value
)
rows.append(
{
"address": addr.address1,
"postcode": addr.postcode,
"portfolio_id": body.portfolio_id,
"uprn": addr.uprn,
"landlord_property_id": addr.landlord_property_id,
"creation_status": PropertyCreationStatus.LOADING,
"status": status,
"has_pre_condition_report": False,
"has_recommendations": False,
}
)
if not rows:
return []
stmt = (
insert(PropertyModel)
.values(rows)
.on_conflict_do_nothing(
index_elements=["portfolio_id", "uprn"],
index_where=PropertyModel.uprn.isnot(None),
)
.returning(
PropertyModel.id,
PropertyModel.uprn,
PropertyModel.landlord_property_id,
)
)
result = session.execute(stmt)
session.flush()
return result.fetchall()
def bulk_update_properties(session: Session, property_updates: list[dict]):
if not property_updates:
return
now = datetime.datetime.now(pytz.utc)
stmt = (
update(PropertyModel.__table__)
.where(
PropertyModel.id == bindparam("b_id"),
PropertyModel.portfolio_id == bindparam("b_portfolio_id"),
)
.values(
**{k: bindparam(k) for k in property_updates[0]["data"].keys()},
updated_at=now,
)
)
payload = [
{
"b_id": row["property_id"], # renamed bind param
"b_portfolio_id": row["portfolio_id"],
**row["data"],
}
for row in property_updates
]
session.execute(
stmt,
payload,
execution_options={"synchronize_session": False},
)
def bulk_upsert_property_details_epc(session: Session, rows: list[dict]):
if not rows:
return
insert_stmt = insert(PropertyDetailsEpcModel).values(rows)
update_cols = {
col.name: insert_stmt.excluded[col.name]
for col in PropertyDetailsEpcModel.__table__.columns
if col.name not in ("id",)
}
stmt = insert_stmt.on_conflict_do_update(
index_elements=["portfolio_id", "property_id"],
set_=update_cols,
)
session.execute(stmt)
def bulk_upsert_property_spatial(session: Session, rows: list[dict]):
if not rows:
return
values = []
for row in rows:
values.append({
"uprn": row["uprn"],
**row["data"],
})
insert_stmt = insert(PropertyDetailsSpatial).values(values)
update_cols = {
col.name: insert_stmt.excluded[col.name]
for col in PropertyDetailsSpatial.__table__.columns
if col.name not in ("id", "uprn")
}
stmt = insert_stmt.on_conflict_do_update(
index_elements=["uprn"],
set_=update_cols,
)
session.execute(stmt)

View file

@ -1,13 +1,93 @@
from sqlalchemy import text
from sqlalchemy import insert, delete
from sqlalchemy.orm import Session
from sqlalchemy.exc import SQLAlchemyError
from backend.app.db.models.recommendations import (
Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario
)
from backend.app.db.models.portfolio import (
PropertyModel, PropertyTargetsModel, PropertyDetailsEpcModel
)
from backend.app.db.models.funding import FundingPackageMeasures, FundingPackage
from backend.app.db.models.portfolio import PropertyModel
from backend.app.db.connection import db_session, db_read_session
def prepare_plan_data(
p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations,
rebaselining_carbon=0, rebaselining_heat_demand=0, rebaselining_kwh=0, rebaselining_bills=0,
):
"""
Utility function to prepare the data that goes into the production of a plan. Is a fairly rough and unstructured
function that will need improving in the future
:param p: Instantiated property
:param body: request body, PlanTriggerRequest
:param scenario_id: unique identifier for the scenario
:param eco_packages: Pre-constructed eco packages for a property
:param valuations: valuation improvement data
:param new_sap_points: sap points, post default recommendations
:param new_epc: new epc rating, post default recommendations
:param default_recommendations: list of default recommendations for a property
:param rebaselining_carbon: carbon emissions adjustment for rebaselining
:param rebaselining_heat_demand: heat demand adjustment for rebaselining
:param rebaselining_kwh: kwh consumption adjustment for rebaselining
:param rebaselining_bills: energy bill adjustment for rebaselining
:return:
"""
# Plan carbon savings
co2_savings = sum(
[r["co2_equivalent_savings"] for r in default_recommendations if not r.get("already_installed", False)]
)
post_co2_emissions = p.energy["co2_emissions"] - rebaselining_carbon - co2_savings
# Plan bill savings
energy_bill_savings = sum(
[r["energy_cost_savings"] for r in default_recommendations if not r.get("already_installed", False)]
)
post_energy_bill = sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings
# energy consumption
energy_consumption_savings = sum(
[r["kwh_savings"] for r in default_recommendations if not r.get("already_installed", False)]
)
post_energy_consumption = p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings
valuation_post_retrofit, valuation_increase = None, None
if valuations["current_value"]:
valuation_increase = valuations["average_increase"]
valuation_post_retrofit = valuations["average_increased_value"]
# plan costing data
cost_of_works = sum([r["total"] for r in default_recommendations if not r.get("already_installed", False)])
contingency_cost = sum(
[r.get("contingency", 0) for r in default_recommendations if not r.get("already_installed", False)]
)
return {
"portfolio_id": body.portfolio_id,
"property_id": p.id,
"scenario_id": scenario_id,
"is_default": True if p.is_new else False,
"name": body.scenario_name,
"valuation_increase_lower_bound": (
valuations["lower_bound_increased_value"] - valuations["current_value"]
),
"valuation_increase_upper_bound": (
valuations["upper_bound_increased_value"] - valuations["current_value"]
),
"valuation_increase_average": (
valuations["average_increased_value"] - valuations["current_value"]
),
"post_sap_points": float(new_sap_points),
"post_epc_rating": new_epc,
"post_co2_emissions": float(post_co2_emissions),
"co2_savings": float(co2_savings),
"post_energy_bill": float(post_energy_bill),
"energy_bill_savings": float(energy_bill_savings),
"post_energy_consumption": float(post_energy_consumption),
"energy_consumption_savings": float(energy_consumption_savings),
"valuation_post_retrofit": valuation_post_retrofit,
"valuation_increase": valuation_increase,
"cost_of_works": float(cost_of_works),
"contingency_cost": float(contingency_cost),
"plan_type": eco_packages.get(p.id, (None, None, None))[2]
}
def create_plan(session: Session, plan):
@ -27,27 +107,47 @@ def create_plan(session: Session, plan):
raise e
def create_scenario(session: Session, scenario):
"""
This function will create a record for the scenario in the database if it does not exist.
:param session: The database session
:param scenario: dictionary of data representing a scenario to be created
"""
try:
def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int, int]:
if not plans_to_create:
return {}
# Before creating a new scenario, we check if there is a scenario for this portfolio id already
# If there is, it means that any new scnario created will NOT be the default scenario
existing_scenario = session.query(Scenario).filter_by(portfolio_id=scenario["portfolio_id"]).first()
scenario["is_default"] = True if not existing_scenario else False
payload = [
{
"property_id": p["property_id"],
**p["plan_data"],
}
for p in plans_to_create
]
new_scenario = Scenario(**scenario)
session.add(new_scenario)
session.flush()
session.commit()
return new_scenario
except SQLAlchemyError as e:
session.rollback()
raise e
stmt = (
insert(Plan)
.values(payload)
.returning(Plan.id, Plan.property_id)
)
result = session.execute(stmt).all()
# property_id -> plan_id
return {row.property_id: row.id for row in result}
def create_scenario(session: Session, scenario: dict) -> int:
existing_scenario = (
session.query(Scenario)
.filter_by(portfolio_id=scenario["portfolio_id"])
.first()
)
scenario["is_default"] = not bool(existing_scenario)
new_scenario = Scenario(**scenario)
session.add(new_scenario)
session.flush() # ensures ID is populated
scenario_id = new_scenario.id
session.commit()
return scenario_id
def create_recommendation(session: Session, recommendation):
@ -168,54 +268,277 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
return False
def clear_portfolio(session: Session, portfolio_id: int):
# Fetch all property IDs associated with the given portfolio
property_ids = session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == portfolio_id).all()
property_ids = [p.id for p in property_ids]
def bulk_upload_recommendations_and_materials(
session: Session,
recommendation_payload: list[dict],
):
if not recommendation_payload:
return
# Fetch all recommendation IDs associated with the properties
recommendation_ids = session.query(Recommendation.id).filter(Recommendation.property_id.in_(property_ids)).all()
recommendation_ids = [r.id for r in recommendation_ids]
# ---------------------------------------------------------
# 1. Prepare recommendation rows
# ---------------------------------------------------------
recommendation_rows = []
parts_by_index = []
plan_ids_by_index = []
# Fetch all plan IDs associated with the portfolio
plan_ids = session.query(Plan.id).filter(Plan.portfolio_id == portfolio_id).all()
plan_ids = [p.id for p in plan_ids]
for rec in recommendation_payload:
recommendation_rows.append({
"property_id": rec["property_id"],
"type": rec["type"],
"measure_type": rec["measure_type"],
"description": rec["description"],
"estimated_cost": rec["estimated_cost"],
"default": rec["default"],
"starting_u_value": rec["starting_u_value"],
"new_u_value": rec["new_u_value"],
"sap_points": rec["sap_points"],
"heat_demand": rec["heat_demand"],
"kwh_savings": rec["kwh_savings"],
"co2_equivalent_savings": rec["co2_equivalent_savings"],
"energy_savings": rec["energy_savings"],
"energy_cost_savings": rec["energy_cost_savings"],
"total_work_hours": rec["total_work_hours"],
"labour_days": rec["labour_days"],
"already_installed": rec["already_installed"],
})
# Delete all entries from RecommendationMaterials for these recommendations
session.execute(
delete(RecommendationMaterials).where(RecommendationMaterials.recommendation_id.in_(recommendation_ids))
parts_by_index.append(rec["parts"])
plan_ids_by_index.append(rec["plan_id"])
# ---------------------------------------------------------
# 2. Insert recommendations and get IDs
# ---------------------------------------------------------
result = session.execute(
insert(Recommendation)
.values(recommendation_rows)
.returning(Recommendation.id)
)
# Delete all entries from PlanRecommendations that reference plans in the portfolio
session.execute(delete(PlanRecommendations).where(PlanRecommendations.plan_id.in_(
session.query(Plan.id).filter(Plan.portfolio_id == portfolio_id).subquery().as_scalar()
)))
recommendation_ids = [row[0] for row in result]
# ---------------------------------------------------------
# 3. Insert recommendation materials
# ---------------------------------------------------------
materials_rows = []
for recommendation_id, parts in zip(recommendation_ids, parts_by_index):
for part in parts:
materials_rows.append({
"recommendation_id": recommendation_id,
"material_id": part["material_id"],
"depth": part["depth"],
"quantity": part["quantity"],
"quantity_unit": part["quantity_unit"],
"estimated_cost": part["estimated_cost"],
})
if materials_rows:
session.execute(
insert(RecommendationMaterials).values(materials_rows)
)
# ---------------------------------------------------------
# 4. Insert plan ↔ recommendation links
# ---------------------------------------------------------
plan_recommendation_rows = [
{
"plan_id": plan_id,
"recommendation_id": recommendation_id,
}
for plan_id, recommendation_id in zip(
plan_ids_by_index, recommendation_ids
)
]
# Delete FundingPackageMeasures → FundingPackage → Plan
session.execute(
delete(FundingPackageMeasures).where(FundingPackageMeasures.funding_package_id.in_(
session.query(FundingPackage.id).filter(FundingPackage.plan_id.in_(plan_ids))
))
)
session.execute(
delete(FundingPackage).where(FundingPackage.plan_id.in_(plan_ids))
insert(PlanRecommendations).values(plan_recommendation_rows)
)
# Delete all Plans associated with the portfolio
session.execute(delete(Plan).where(Plan.portfolio_id == portfolio_id))
# Delete all Scenarios associated with the portfolio
session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id))
def chunked(iterable, size=100):
for i in range(0, len(iterable), size):
yield iterable[i:i + size]
# Delete all Recommendations associated with the properties
session.execute(delete(Recommendation).where(Recommendation.property_id.in_(property_ids)))
# Now, delete the PropertyModels and related details
# Delete PropertyTargetsModel, PropertyDetailsMeter, PropertyDetailsEpcModel, and PropertyModel
session.execute(delete(PropertyTargetsModel).where(PropertyTargetsModel.portfolio_id == portfolio_id))
# session.execute(delete(PropertyDetailsMeter).where(PropertyDetailsMeter.uprn.in_(property_ids)))
session.execute(delete(PropertyDetailsEpcModel).where(PropertyDetailsEpcModel.portfolio_id == portfolio_id))
session.execute(delete(PropertyModel).where(PropertyModel.portfolio_id == portfolio_id))
def get_property_ids(portfolio_id: int) -> list[int]:
with db_read_session() as session:
return [
pid for (pid,) in
session.query(PropertyModel.id)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
]
# Commit the changes
session.commit()
def delete_property_batch(session: Session, property_ids: list[int]):
if not property_ids:
return
params = {"property_ids": property_ids}
# --------------------------------------------------
# recommendation_materials (via recommendation)
# --------------------------------------------------
session.execute(
text("""
DELETE FROM recommendation_materials rm
USING recommendation r
WHERE rm.recommendation_id = r.id
AND r.property_id = ANY(:property_ids)
"""),
params,
)
# --------------------------------------------------
# plan_recommendations (via plan)
# --------------------------------------------------
session.execute(
text("""
DELETE FROM plan_recommendations pr
USING plan p
WHERE pr.plan_id = p.id
AND p.property_id = ANY(:property_ids)
"""),
params,
)
# --------------------------------------------------
# funding_package_measures
# --------------------------------------------------
session.execute(
text("""
DELETE FROM funding_package_measures fpm
USING funding_package fp, plan p
WHERE fpm.funding_package_id = fp.id
AND fp.plan_id = p.id
AND p.property_id = ANY(:property_ids)
"""),
params,
)
# --------------------------------------------------
# inspections (direct)
# --------------------------------------------------
session.execute(
text("""
DELETE FROM inspections
WHERE property_id = ANY(:property_ids)
"""),
params,
)
# --------------------------------------------------
# funding_package
# --------------------------------------------------
session.execute(
text("""
DELETE FROM funding_package fp
USING plan p
WHERE fp.plan_id = p.id
AND p.property_id = ANY(:property_ids)
"""),
params,
)
# --------------------------------------------------
# recommendation (direct — CRITICAL FIX)
# --------------------------------------------------
session.execute(
text("""
DELETE FROM recommendation
WHERE property_id = ANY(:property_ids)
"""),
params,
)
# --------------------------------------------------
# plan (direct)
# --------------------------------------------------
session.execute(
text("""
DELETE FROM plan
WHERE property_id = ANY(:property_ids)
"""),
params,
)
# --------------------------------------------------
# property-scoped tables
# --------------------------------------------------
session.execute(
text("""
DELETE FROM property_details_epc
WHERE property_id = ANY(:property_ids)
"""),
params,
)
session.execute(
text("""
DELETE FROM property_targets
WHERE property_id = ANY(:property_ids)
"""),
params,
)
# --------------------------------------------------
# properties LAST
# --------------------------------------------------
session.execute(
text("""
DELETE FROM property
WHERE id = ANY(:property_ids)
"""),
params,
)
def portfolio_has_properties(portfolio_id: int) -> bool:
with db_read_session() as session:
return session.query(
session.query(PropertyModel)
.filter(PropertyModel.portfolio_id == portfolio_id)
.exists()
).scalar()
def delete_portfolio_scenarios_if_empty(portfolio_id: int):
if portfolio_has_properties(portfolio_id):
print("Properties still exist — skipping scenario deletion")
return
with db_session() as session:
session.execute(
delete(Scenario)
.where(Scenario.portfolio_id == portfolio_id)
)
print("Deleted scenarios for empty portfolio")
def clear_portfolio_in_batches(
portfolio_id: int,
property_batch_size: int = 25,
):
property_ids = get_property_ids(portfolio_id)
if not property_ids:
print("No properties found.")
delete_portfolio_scenarios_if_empty(portfolio_id)
return
total = (len(property_ids) + property_batch_size - 1) // property_batch_size
import time
for i, batch in enumerate(chunked(property_ids, property_batch_size), start=1):
print(f"Deleting batch {i}/{total} ({len(batch)} properties)")
start_time = time.time()
with db_session() as session:
delete_property_batch(session, batch)
finish_time = time.time()
print(f"Batch {i} deleted in {finish_time - start_time:.2f} seconds")
# scenario deletion happens AFTER all properties are gone
delete_portfolio_scenarios_if_empty(portfolio_id)
print("Portfolio cleared in batches.")

View file

@ -0,0 +1,323 @@
# ---- Standard Library ----
from typing import Optional, Dict, Any
from datetime import datetime, timezone
from uuid import UUID
import json
# ---- SQLModel / SQLAlchemy ----
from sqlmodel import Session, select
# ---- DB Session ----
from backend.app.db.connection import get_db_session
# ---- Models ----
from backend.app.db.models.tasks import Task, SubTask
# ============================================================
# SubTask Interface
# ============================================================
class SubTaskInterface:
"""
CRUD operations for SubTask + cascading Task progress updates.
"""
# --------------------------------------------------------
# CREATE SUBTASK
# --------------------------------------------------------
def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None, status=None):
now = datetime.now(timezone.utc)
with get_db_session() as session:
task = session.get(Task, task_id)
if not task:
raise ValueError(f"Task {task_id} not found")
# We treat waiting as the default status
status = "waiting" if status is None else status
subtask = SubTask(
task_id=task_id,
inputs=json.dumps(inputs) if inputs else None,
status=status,
job_started=now,
job_completed=None,
)
session.add(subtask)
session.commit()
session.refresh(subtask)
# Recalculate parent task progress
self._update_task_progress(session, task_id)
return subtask.id
# --------------------------------------------------------
# UPDATE STATUS (in progress, complete, failed)
# --------------------------------------------------------
def update_subtask_status(
self, subtask_id: UUID, status: str, outputs=None, cloud_logs_url=None
):
"""
Update the status of a subtask, and recalculate the parent task progress.
:param subtask_id: UUID of the subtask to update
:param status: New status (in progress, complete, failed)
:param outputs: Optional outputs to set
:param cloud_logs_url: Optional cloud logs URL to set
:return:
"""
now = datetime.now(timezone.utc)
with get_db_session() as session:
subtask = session.get(SubTask, subtask_id)
if not subtask:
raise ValueError(f"SubTask {subtask_id} not found")
normalized = status.lower()
# When job really starts
if normalized == "in progress" and subtask.job_started is None:
subtask.job_started = now
# Completed or failed
if normalized in ("complete", "failed"):
subtask.job_completed = now
subtask.status = normalized
subtask.updated_at = now
if outputs is not None:
subtask.outputs = json.dumps(outputs)
if cloud_logs_url is not None:
subtask.cloud_logs_url = cloud_logs_url
session.add(subtask)
session.commit()
# Recalculate task status
self._update_task_progress(session, subtask.task_id)
session.refresh(subtask)
return subtask
# --------------------------------------------------------
# UPDATE OUTPUTS
# --------------------------------------------------------
@staticmethod
def update_subtask_output(subtask_id: UUID, outputs: Dict[str, Any]):
now = datetime.now(timezone.utc)
with get_db_session() as session:
subtask = session.get(SubTask, subtask_id)
if not subtask:
raise ValueError(f"SubTask {subtask_id} not found")
subtask.outputs = json.dumps(outputs)
subtask.updated_at = now
session.add(subtask)
session.commit()
session.refresh(subtask)
return subtask
# --------------------------------------------------------
# UPDATE CLOUD LOGS URL
# --------------------------------------------------------
@staticmethod
def update_subtask_logs(subtask_id: UUID, cloud_logs_url: str):
now = datetime.now(timezone.utc)
with get_db_session() as session:
subtask = session.get(SubTask, subtask_id)
if not subtask:
raise ValueError(f"SubTask {subtask_id} not found")
subtask.cloud_logs_url = cloud_logs_url
subtask.updated_at = now
session.add(subtask)
session.commit()
session.refresh(subtask)
return subtask
# --------------------------------------------------------
# SET BOTH OUTPUT + LOGS
# --------------------------------------------------------
@staticmethod
def set_subtask_result(
subtask_id: UUID,
outputs: Optional[Dict[str, Any]] = None,
cloud_logs_url: Optional[str] = None,
):
now = datetime.now(timezone.utc)
with get_db_session() as session:
subtask = session.get(SubTask, subtask_id)
if not subtask:
raise ValueError(f"SubTask {subtask_id} not found")
if outputs is not None:
subtask.outputs = json.dumps(outputs)
if cloud_logs_url is not None:
subtask.cloud_logs_url = cloud_logs_url
subtask.updated_at = now
session.add(subtask)
session.commit()
session.refresh(subtask)
return subtask
# --------------------------------------------------------
# TASK PROGRESS CALCULATION
# --------------------------------------------------------
@staticmethod
def _update_task_progress(session: Session, task_id: UUID):
task = session.get(Task, task_id)
if not task:
return
subtasks = session.exec(
select(SubTask).where(SubTask.task_id == task_id)
).all()
statuses = [s.status.lower() for s in subtasks]
now = datetime.now(timezone.utc)
if "failed" in statuses:
task.status = "failed"
task.job_completed = now
elif all(s == "complete" for s in statuses):
task.status = "complete"
task.job_completed = now
elif "in progress" in statuses:
task.status = "in progress"
# if task.job_started is None:
# task.job_started = now
else:
# All waiting
task.status = "waiting"
task.job_completed = None
task.updated_at = now
session.add(task)
session.commit()
def finalize_subtask(
self,
subtask_id: UUID,
status: str,
outputs: Optional[Dict[str, Any]],
cloud_logs_url: Optional[str]
):
now = datetime.now(timezone.utc)
with get_db_session() as session:
subtask = session.get(SubTask, subtask_id)
if not subtask:
raise ValueError(f"SubTask {subtask_id} not found")
normalized = status.lower()
if normalized not in ("complete", "failed"):
raise ValueError("Status must be 'complete' or 'failed'")
# Set outputs
if outputs is not None:
subtask.outputs = json.dumps(outputs)
# Set logs
if cloud_logs_url is not None:
subtask.cloud_logs_url = cloud_logs_url
# Status + timestamps
subtask.status = normalized
subtask.job_completed = now
subtask.updated_at = now
session.add(subtask)
session.commit()
# Update parent task (complete/failed)
self._update_task_progress(session, subtask.task_id)
session.refresh(subtask)
return subtask
# ============================================================
# Task Interface
# ============================================================
class TasksInterface:
"""
High-level operations for Task records.
"""
@staticmethod
def create_task(
task_source: str,
service: Optional[str] = None,
inputs: Optional[Dict[str, Any]] = None,
task_only: bool = False,
):
"""
Create a new Task record, and an initial SubTask in waiting state. Can also be used to create just
a task, without a subtask
:param task_source: Text indicating source of task creation (e.g. file path + function name)
:param service: Optional service name
:param inputs: Inputs of the job being run
:param task_only: If True, only create the Task record, without a SubTask
:return:
"""
now = datetime.now(timezone.utc)
with get_db_session() as session:
task = Task(
task_source=task_source,
service=service,
status="waiting",
job_started=now,
job_completed=None,
)
session.add(task)
session.commit()
session.refresh(task)
if task_only:
return task.id, None
# Create first subtask in waiting state
subtask_interface = SubTaskInterface()
subtask_id = subtask_interface.create_subtask(
task_id=task.id,
inputs=inputs,
)
return task.id, subtask_id
@staticmethod
def update_task_status(task_id: UUID, status: str):
now = datetime.now(timezone.utc)
with get_db_session() as session:
task = session.get(Task, task_id)
if not task:
raise ValueError(f"Task {task_id} not found")
normalized = status.lower()
if normalized == "in progress" and task.job_started is None:
task.job_started = now
if normalized == "complete":
task.job_completed = now
task.status = normalized
task.updated_at = now
session.add(task)
session.commit()
session.refresh(task)
return task

View file

@ -0,0 +1,80 @@
from backend.app.db.connection import get_db_session
from backend.app.db.models.whlg import Whlg
def upsert_whlg_postcode(postcode: str):
"""
Manually upsert a postcode into the WHLG table.
No unique constraint is required.
"""
cleaned = postcode.lower().replace(" ", "")
with get_db_session() as session:
# Check if record exists
existing = session.query(Whlg).filter(Whlg.postcode == cleaned).first()
if existing:
return existing # nothing to update, just return it
# Insert a new row
record = Whlg(postcode=cleaned)
session.add(record)
session.commit()
session.refresh(record)
return record
# One time script to upload 400,000 records in one go with the pay
# of pandas and one insert
from backend.app.db.connection import get_db_session
from backend.app.db.models.whlg import Whlg
from sqlalchemy import select
from sqlalchemy.orm import Session
def upload_whlg_from_dataframe(df):
"""
FAST bulk insert of WHLG postcodes (400k+ rows).
No unique constraint needed.
"""
if "Postcode" not in df.columns:
raise ValueError("DataFrame must contain a 'Postcode' column")
# 1. Clean incoming postcodes
cleaned_postcodes = (
df["Postcode"]
.astype(str)
.str.lower()
.str.replace(" ", "", regex=False)
.dropna()
.unique()
.tolist()
)
with get_db_session() as session:
# 2. Fetch existing postcodes once (VERY FAST)
existing = session.exec(select(Whlg.postcode)).all()
existing_set = set(existing)
# 3. Determine which are new
new_postcodes = [
pc for pc in cleaned_postcodes if pc not in existing_set
]
if not new_postcodes:
return {"inserted": 0, "skipped_existing": len(cleaned_postcodes)}
# 4. Bulk insert new postcodes in one shot
objects = [Whlg(postcode=pc) for pc in new_postcodes]
session.bulk_save_objects(objects)
session.commit()
return {
"inserted": len(new_postcodes),
"skipped_existing": len(cleaned_postcodes) - len(new_postcodes),
"total_provided": len(cleaned_postcodes)
}

View file

@ -0,0 +1,34 @@
from sqlalchemy import (
Column,
Integer,
String,
JSON,
TIMESTAMP,
func,
UniqueConstraint,
)
from sqlalchemy.orm import declarative_base
Base = declarative_base()
class PostcodeSearch(Base):
__tablename__ = "postcode_search"
id = Column(Integer, primary_key=True, autoincrement=True)
# Normalized postcode (uppercase, no spaces)
postcode = Column(String, nullable=False, unique=True)
# Full OS Places API response (stored as JSONB)
result_data = Column(JSON, nullable=False)
# Timestamp for when the entry was first created
created_at = Column(TIMESTAMP(timezone=False), server_default=func.now(), nullable=False)
__table_args__ = (
UniqueConstraint("postcode", name="uq_postcode_search_postcode"),
)
def __repr__(self):
return f"<PostcodeSearch(id={self.id}, postcode='{self.postcode}')>"

View file

@ -0,0 +1,29 @@
from sqlalchemy import (
Column,
Integer,
String,
JSON,
TIMESTAMP,
UniqueConstraint,
)
from sqlalchemy.orm import declarative_base
Base = declarative_base()
class EpcStore(Base):
"""
Stores EPC data retrieved from the EPC API and EPC web pages.
"""
__tablename__ = "epc_store"
id = Column(Integer, primary_key=True, autoincrement=True)
uprn = Column(Integer)
epc_api_created_at = Column(TIMESTAMP(timezone=False))
epc_api = Column(JSON, nullable=False)
epc_page_created_at = Column(TIMESTAMP(timezone=False))
epc_page = Column(String)
epc_page_rrn = Column(String)
def __repr__(self):
return f"<EpcStore(id={self.id}, uprn='{self.uprn}')>"

View file

@ -0,0 +1,262 @@
import enum
import pytz
import datetime
from sqlalchemy import (
Column,
BigInteger,
Text,
DateTime,
Enum,
ForeignKey,
)
from sqlalchemy.ext.declarative import declarative_base
from backend.app.db.models.portfolio import PropertyModel
Base = declarative_base()
# -------------------------------------------------------------------
# ENUM DEFINITIONS (equivalent to drizzle pgEnum calls)
# -------------------------------------------------------------------
class InspectionArchetype(enum.Enum):
BUNGALOW = "Bungalow"
FLAT = "Flat"
MAISONETTE = "Maisonette"
HOUSE = "House"
NON_DOMESTIC = "non-domestic"
class InspectionArchetype2(enum.Enum):
DETACHED = "detached"
MID_TERRACE = "mid-terrace"
ENCLOSED_MID_TERRACE = "enclosed mid-terrace"
END_TERRACE = "end-terrace"
ENCLOSED_END_TERRACE = "enclosed end-terrace"
SEMI_DETACHED = "semi-detached"
class InspectionsWallConstruction(enum.Enum):
CAVITY = "cavity"
SOLID = "solid"
SYSTEM_BUILT = "system built"
TIMBER_FRAMED = "timber framed"
STEEL_FRAMED = "steel framed"
RE_WALLED_CAVITY = "re-walled cavity"
MANSARD_PRE_FAB = "mansard pre-fab"
MANSARD_EWI = "mansard ewi"
MANSARD_RE_WALLED = "mansard re-walled"
class InspectionsWallInsulation(enum.Enum):
EMPTY_CAVITY = "empty cavity"
FILLED_AT_BUILD = "filled at build"
PARTIAL = "partial"
RETRO_DRILLED = "retro drilled"
EWI = "ewi"
IWI = "iwi"
SOLID_NON_CAVITY = "solid non-cavity"
SYSTEM_BUILT = "system built"
TIMBER_FRAMED = "timber framed"
STEEL_FRAMED = "steel framed"
class InspectionsInsulationMaterial(enum.Enum):
EMPTY_50_90 = "empty 50-90"
EMPTY_100_PLUS = "empty 100+"
EMPTY_30_40 = "empty 30-40"
EMPTY_LESS_THAN_30 = "empty less than 30"
LOOSE_FIBRE_WOOL = "loose fibre/wool"
EPS_CELO_KING = "eps/celo/king"
FIBRE_BATTS_WITH_CAVITY = "fibre batts - with cavity"
FIBRE_BATTS_NO_CAVITY = "fibre batts - no cavity"
LOOSE_BEAD = "loose bead"
GLUED_BEAD = "glued bead"
FORMALDEHYDE = "formaldehyde"
BUBBLE_WRAP = "bubble wrap"
POLY_CHUNKS = "poly chunks"
class InspectionBorescoped(enum.Enum):
YES = "yes"
NO = "no"
REFUSED = "refused"
class InspectionsRoofOrientation(enum.Enum):
NORTH = "north"
EAST = "east"
SOUTH = "south"
WEST = "west"
NORTH_EAST = "north-east"
NORTH_WEST = "north-west"
SOUTH_EAST = "south-east"
SOUTH_WEST = "south-west"
N_S_SPLIT = "n/s split"
E_W_SPLIT = "e/w split"
NE_SW_SPLIT = "ne/sw split"
NW_SE_SPLIT = "nw/se split"
FLAT_ROOF = "flat roof"
NO_ROOF = "no roof"
ROOF_TOO_SMALL = "roof too small"
ALREADY_HAS_SOLAR_PV = "already has solar pv"
class InspectionsTileHung(enum.Enum):
YES = "yes"
NO = "no"
FIRST_FLOOR_FLATS_TILE_HUNG = "first floor flats are tile hung"
class InspectionsRendered(enum.Enum):
NO_RENDER = "no render"
INSUFFICIENT_DPC_SPACE = "rendered with “insufficient” space between dpc and render"
SUFFICIENT_DPC_SPACE = "rendered with “sufficient” space between dpc and render"
class InspectionsCladding(enum.Enum):
NONE = "none"
SUFFICIENT_SPACE = "cladded with “sufficient space to fill the wall”"
INSUFFICIENT_SPACE = "cladded with “insufficient space to fill the wall”"
class InspectionsAccessIssues(enum.Enum):
SEE_NOTES = "see notes"
DAMP_ISSUES = "damp issues"
FOLIAGE_ON_WALLS = "foliage on walls"
BUSHES_AGAINST_WALL = "bushes against wall"
TREES_AROUND_ABOVE = "trees around/anove property"
HIGH_RISE = "high rise block flats/maisonettes"
CONSERVATORY = "conservatory"
LEAN_TO = "lean-to"
GARAGE = "garage"
EXTENSION = "extension"
DECKING = "decking"
SHED_AGAINST_WALL = "shed against wall"
class InspectionModel(Base):
__tablename__ = "inspections"
id = Column(BigInteger, primary_key=True, autoincrement=True)
property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
archetype = Column(
Enum(
InspectionArchetype,
name="inspection_archetype",
values_callable=lambda e: [m.value for m in e],
create_type=False,
),
nullable=True,
)
archetype_2 = Column(
Enum(
InspectionArchetype2,
name="inspection_archetype_2",
values_callable=lambda e: [m.value for m in e],
create_type=False,
),
nullable=True,
)
wall_construction = Column(
Enum(
InspectionsWallConstruction,
name="inspections_wall_construction",
values_callable=lambda e: [m.value for m in e],
create_type=False,
),
nullable=True,
)
insulation = Column(
Enum(
InspectionsWallInsulation,
name="inspections_wall_insulation",
values_callable=lambda e: [m.value for m in e],
create_type=False,
),
nullable=True,
)
insulation_material = Column(
Enum(
InspectionsInsulationMaterial,
name="inspections_insulation_material",
values_callable=lambda e: [m.value for m in e],
create_type=False,
),
nullable=True,
)
borescoped = Column(
Enum(
InspectionBorescoped,
name="inspection_borescoped",
values_callable=lambda e: [m.value for m in e],
create_type=False,
),
nullable=True,
)
roof_orientation = Column(
Enum(
InspectionsRoofOrientation,
name="inspections_roof_orientation",
values_callable=lambda e: [m.value for m in e],
create_type=False,
),
nullable=True,
)
tile_hung = Column(
Enum(
InspectionsTileHung,
name="inspections_tile_hung",
values_callable=lambda e: [m.value for m in e],
create_type=False,
),
nullable=True,
)
rendered = Column(
Enum(
InspectionsRendered,
name="inspections_rendered",
values_callable=lambda e: [m.value for m in e],
create_type=False,
),
nullable=True,
)
cladding = Column(
Enum(
InspectionsCladding,
name="inspections_cladding",
values_callable=lambda e: [m.value for m in e],
create_type=False,
),
nullable=True,
)
access_issues = Column(
Enum(
InspectionsAccessIssues,
name="inspections_access_issues",
values_callable=lambda e: [m.value for m in e],
create_type=False,
),
nullable=True,
)
notes = Column(Text)
surveyor_name = Column(Text)
created_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)
uploaded_at = Column(
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
)

View file

@ -19,6 +19,8 @@ class MaterialType(enum.Enum):
flat_roof_insulation = "flat_roof_insulation"
room_roof_insulation = "room_roof_insulation"
windows_glazing = "windows_glazing"
secondary_glazing = "secondary_glazing"
double_glazing = "double_glazing"
cavity_wall_extraction = "cavity_wall_extraction"
iwi_wall_demolition = "iwi_wall_demolition"
@ -43,8 +45,13 @@ class MaterialType(enum.Enum):
solar_pv = "solar_pv"
solar_battery = "solar_battery"
scaffolding = "scaffolding"
# Heating systems
high_heat_retention_storage_heaters = "high_heat_retention_storage_heaters"
air_soruce_heat_pump = "air_soruce_heat_pump"
boiler_upgrade = "boiler_upgrade"
sealing_fireplace = "sealing_fireplace"
roomstat_programmer_trvs = "roomstat_programmer_trvs"
time_temperature_zone_control = "time_temperature_zone_control"
class DepthUnit(enum.Enum):

View file

@ -4,6 +4,7 @@ import datetime
from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint
from sqlalchemy.ext.declarative import declarative_base
from backend.app.db.models.users import UserModel # noqa
from backend.app.db.models.materials import MaterialType
Base = declarative_base()
@ -86,6 +87,7 @@ class PropertyModel(Base):
portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
creation_status = Column(Enum(PropertyCreationStatus), nullable=False)
uprn = Column(Integer)
landlord_property_id = Column(Text)
building_reference_number = Column(Integer)
status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False)
address = Column(Text)
@ -104,6 +106,10 @@ class PropertyModel(Base):
current_epc_rating = Column(Enum(Epc))
current_sap_points = Column(Float)
current_valuation = Column(Float)
# Following fields are for recording already installed adjustments to a property's SAP
installed_measures_sap_point_adjustment = Column(Float)
is_sap_points_adjusted_for_installed_measures = Column(Boolean, default=False)
original_sap_points = Column(Float)
class FeatureRating(enum.Enum):
@ -135,6 +141,8 @@ class PropertyDetailsEpcModel(Base):
property_id = Column(Integer, ForeignKey('property.id'), nullable=False)
portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
full_address = Column(Text)
lodgement_date = Column(DateTime)
is_expired = Column(Boolean)
total_floor_area = Column(Float)
walls = Column(Text)
walls_rating = Column(Integer, CheckConstraint('walls_rating>=1 AND walls_rating<=5'))
@ -173,6 +181,9 @@ class PropertyDetailsEpcModel(Base):
current_energy_demand = Column(Float)
current_energy_demand_heating_hotwater = Column(Float)
estimated = Column(Boolean, default=False)
sap_05_overwritten = Column(Boolean, default=False)
sap_05_score = Column(Integer)
sap_05_epc_rating = Column(Enum(Epc))
# Include estimates for energy bills, across the different types of energy
heating_cost_current = Column(Float)
hot_water_cost_current = Column(Float)
@ -181,6 +192,18 @@ class PropertyDetailsEpcModel(Base):
gas_standing_charge = Column(Float)
electricity_standing_charge = Column(Float)
# Columns for re-baselining if we have an already installed measure
original_co2_emissions = Column(Float)
original_primary_energy_consumption = Column(Float)
original_current_energy_demand = Column(Float)
original_current_energy_demand_heating_hotwater = Column(Float)
# Adjustments
installed_measures_co2_adjustment = Column(Float)
installed_measures_energy_demand_adjustment = Column(Float)
installed_measures_total_energy_bill_adjustment = Column(Float)
installed_measures_heat_demand_adjustment = Column(Float)
is_epc_adjusted_for_installed_measures = Column(Boolean, default=False)
class PropertyDetailsSpatial(Base):
__tablename__ = "property_details_spatial"
@ -224,3 +247,18 @@ class PortfolioUsers(Base):
role = Column(Text, nullable=False)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
class PropertyInstalledMeasures(Base):
"""
This model keeps a record of the installed measures for each property, at the UPRN level
"""
__tablename__ = 'property_installed_measures'
id = Column(Integer, primary_key=True, autoincrement=True)
uprn = Column(Integer, nullable=False)
measure_type = Column(
Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False),
nullable=False
)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
installed_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))

View file

@ -3,7 +3,9 @@ from sqlalchemy.orm import declarative_base
from sqlalchemy.sql import func
from backend.app.db.models.portfolio import Portfolio, PropertyModel
from backend.app.db.models.materials import Material
from backend.app.db.models.portfolio import Epc
from datatypes.enums import QuantityUnits
import enum
Base = declarative_base()
@ -47,6 +49,14 @@ class RecommendationMaterials(Base):
estimated_cost = Column(Float, nullable=False)
class PlanTypeEnum(enum.Enum):
SOLAR_ECO4 = "solar_eco4"
SOLAR_HHRSH_ECO4 = "solar_hhrsh_eco4"
EMPTY_CAVITY_ECO = "empty_cavity_eco"
PARTIAL_CAVITY_ECO = "partial_cavity_eco"
EXTRACTION_ECO = "extraction_eco"
class Plan(Base):
__tablename__ = 'plan'
@ -60,6 +70,28 @@ class Plan(Base):
valuation_increase_lower_bound = Column(Float)
valuation_increase_upper_bound = Column(Float)
valuation_increase_average = Column(Float)
plan_type = Column(
Enum(
PlanTypeEnum,
name="plan_type",
values_callable=lambda e: [m.value for m in e],
create_type=False,
),
nullable=True,
)
post_sap_points = Column(Float)
post_epc_rating = Column(Enum(Epc))
post_co2_emissions = Column(Float)
co2_savings = Column(Float)
post_energy_bill = Column(Float)
energy_bill_savings = Column(Float)
post_energy_consumption = Column(Float) # energy demand in kWh/year
energy_consumption_savings = Column(Float)
valuation_post_retrofit = Column(Float)
valuation_increase = Column(Float)
# Financial metrics, excluding funding
cost_of_works = Column(Float)
contingency_cost = Column(Float)
class PlanRecommendations(Base):
@ -114,3 +146,58 @@ class Scenario(Base):
valuation_return_on_investment = Column(String)
property_valuation_increase = Column(Float)
labour_days = Column(Float)
class MeasureType(enum.Enum):
air_source_heat_pump = "air_source_heat_pump"
boiler_upgrade = "boiler_upgrade"
high_heat_retention_storage_heaters = "high_heat_retention_storage_heaters"
secondary_heating = "secondary_heating"
roomstat_programmer_trvs = "roomstat_programmer_trvs"
time_temperature_zone_control = "time_temperature_zone_control"
cylinder_thermostat = "cylinder_thermostat"
cavity_wall_insulation = "cavity_wall_insulation"
extension_cavity_wall_insulation = "extension_cavity_wall_insulation"
external_wall_insulation = "external_wall_insulation"
internal_wall_insulation = "internal_wall_insulation"
loft_insulation = "loft_insulation"
flat_roof_insulation = "flat_roof_insulation"
room_roof_insulation = "room_roof_insulation"
solid_floor_insulation = "solid_floor_insulation"
suspended_floor_insulation = "suspended_floor_insulation"
double_glazing = "double_glazing"
secondary_glazing = "secondary_glazing"
draught_proofing = "draught_proofing"
mechanical_ventilation = "mechanical_ventilation"
low_energy_lighting = "low_energy_lighting"
solar_pv = "solar_pv"
hot_water_tank_insulation = "hot_water_tank_insulation"
sealing_open_fireplace = "sealing_open_fireplace"
class InstalledMeasure(Base):
__tablename__ = "installed_measure"
id = Column(BigInteger, primary_key=True, autoincrement=True)
uprn = Column(BigInteger, nullable=False)
measure_type = Column(
Enum(
MeasureType,
name="measure_type",
values_callable=lambda e: [m.value for m in e],
create_type=False, # <-- critical
),
nullable=False,
)
installed_at = Column(TIMESTAMP)
sap_points = Column(Float)
carbon_savings = Column(Float)
kwh_savings = Column(Float)
bill_savings = Column(Float)
heat_demand_savings = Column(Float)
source = Column(String)
is_active = Column(Boolean, nullable=False, default=True)

View file

@ -0,0 +1,36 @@
from typing import Optional
from datetime import datetime
from uuid import UUID, uuid4
from sqlmodel import SQLModel, Field, Relationship
class Task(SQLModel, table=True):
__tablename__ = "tasks"
id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, )
task_source: str
job_started: Optional[datetime] = None
job_completed: Optional[datetime] = None
status: str = Field(default="In Progress")
service: Optional[str] = None
updated_at: datetime = Field(default_factory=datetime.utcnow)
sub_tasks: list["SubTask"] = Relationship(back_populates="task")
class SubTask(SQLModel, table=True):
__tablename__ = "sub_task"
id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, )
task_id: UUID = Field(foreign_key="tasks.id")
job_started: Optional[datetime] = None
job_completed: Optional[datetime] = None
status: str = Field(default="In Progress")
inputs: Optional[str] = None
outputs: Optional[str] = None
cloud_logs_url: Optional[str] = None
updated_at: datetime = Field(default_factory=datetime.utcnow)
task: Optional["Task"] = Relationship(back_populates="sub_tasks")

View file

@ -0,0 +1,15 @@
import uuid
from typing import Optional
from sqlmodel import SQLModel, Field
class Whlg(SQLModel, table=True):
__tablename__ = "whlg"
id: Optional[int] = Field(
default=None,
primary_key=True,
index=True,
)
postcode: str = Field(nullable=False)

View file

@ -31,6 +31,11 @@ def create_dummy_token(secret: str) -> str:
return token
@router.get("/")
async def dummy_token():
return {"hello": "world"}
@router.get("/dummy-token")
async def dummy_token():
settings = get_settings()

View file

@ -6,14 +6,19 @@ from fastapi.encoders import jsonable_encoder
from starlette.exceptions import HTTPException as StarletteHTTPException
from mangum import Mangum
from backend.app.portfolio import router as portfolio_router
from backend.app.whlg import router as whlg_router
from backend.app.plan import router as plan_router
from backend.app.tasks import router as tasks_router
from backend.app.dependencies import validate_api_key
from backend.app.config import get_settings
logger = logging.getLogger("uvicorn.error")
logging.basicConfig(level=logging.INFO)
app = FastAPI(dependencies=[Depends(validate_api_key)])
if get_settings().ENVIRONMENT == "local":
app = FastAPI()
else:
app = FastAPI(dependencies=[Depends(validate_api_key)])
# Handle 422 errors (validation failures)
@ -52,10 +57,76 @@ async def log_requests(request: Request, call_next):
app.include_router(portfolio_router.router, prefix="/v1")
app.include_router(plan_router.router, prefix="/v1")
app.include_router(whlg_router.router, prefix="/v1")
app.include_router(tasks_router.router, prefix="/v1")
if get_settings().ENVIRONMENT == "local":
from app.local import router as local_router
app.include_router(local_router.router)
handler = Mangum(app)
import logging
from fastapi.responses import JSONResponse
from fastapi import FastAPI, Depends, Request, status
from fastapi.exceptions import RequestValidationError
from fastapi.encoders import jsonable_encoder
from starlette.exceptions import HTTPException as StarletteHTTPException
from mangum import Mangum
from backend.app.portfolio import router as portfolio_router
from backend.app.whlg import router as whlg_router
from backend.app.plan import router as plan_router
from backend.app.dependencies import validate_api_key
from backend.app.config import get_settings
logger = logging.getLogger("uvicorn.error")
logging.basicConfig(level=logging.INFO)
if get_settings().ENVIRONMENT == "local":
app = FastAPI()
else:
app = FastAPI(dependencies=[Depends(validate_api_key)])
# Handle 422 errors (validation failures)
@app.exception_handler(RequestValidationError)
async def validation_exception_handler(request: Request, exc: RequestValidationError):
logger.error(f"422 Validation Error at {request.url}")
logger.error(f"Body: {exc.body}")
logger.error(f"Validation Errors: {exc.errors()}")
return JSONResponse(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
content=jsonable_encoder({
"detail": exc.errors(),
"body": exc.body
}),
)
# Handle generic HTTP exceptions (optional, useful for catching 404, 403, etc.)
@app.exception_handler(StarletteHTTPException)
async def http_exception_handler(request: Request, exc: StarletteHTTPException):
logger.warning(f"{exc.status_code} Error at {request.url} - Detail: {exc.detail}")
return JSONResponse(
status_code=exc.status_code,
content={"detail": exc.detail},
)
# Middleware to log requests
@app.middleware("http")
async def log_requests(request: Request, call_next):
logger.info(f"Incoming request: {request.method} {request.url}")
response = await call_next(request)
logger.info(f"Response status: {response.status_code}")
return response
app.include_router(portfolio_router.router, prefix="/v1")
app.include_router(plan_router.router, prefix="/v1")
app.include_router(whlg_router.router, prefix="/v1")
if get_settings().ENVIRONMENT == "local":
from app.local import router as local_router
app.include_router(local_router.router)
handler = Mangum(app)

View file

@ -0,0 +1,9 @@
from dataclasses import dataclass
from typing import Any, Optional
@dataclass
class PropertyRequestData:
patch: dict
non_invasive_recommendations: dict
valuation: Optional[float]

View file

@ -2,7 +2,8 @@ import boto3
import json
import math
import asyncio
import random
from contextlib import contextmanager
from sqlmodel import Session
from datetime import datetime
@ -15,6 +16,7 @@ from utils.logger import setup_logger
from backend.app.db.connection import db_engine
from backend.app.db.functions.recommendations_functions import create_scenario
from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface
logger = setup_logger()
@ -28,6 +30,19 @@ router = APIRouter(
sqs_client = boto3.client("sqs")
@contextmanager
def db_session():
session = Session(db_engine)
try:
yield session
session.commit()
except Exception:
session.rollback()
raise
finally:
session.close()
@router.post("/trigger", status_code=202)
async def trigger_plan_entrypoint(body: PlanTriggerRequest):
"""
@ -56,36 +71,57 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
scenario_id = data.get("scenario_id")
if not scenario_id:
created_at = datetime.now().isoformat()
session = sessionmaker(bind=db_engine)()
# Create a new scenario
new_scenario = create_scenario(
session=session,
scenario={
"name": body.scenario_name,
"created_at": created_at,
"budget": body.budget,
"portfolio_id": body.portfolio_id,
"housing_type": body.housing_type,
"goal": body.goal,
"goal_value": body.goal_value,
"trigger_file_path": body.trigger_file_path,
"already_installed_file_path": body.already_installed_file_path,
"patches_file_path": body.patches_file_path,
"non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path,
"exclusions": body.exclusions,
"multi_plan": body.multi_plan
}
)
scenario_id = new_scenario.id
with db_session() as session:
# Create a new scenario
scenario_id = create_scenario(
session=session,
scenario={
"name": body.scenario_name,
"created_at": created_at,
"budget": body.budget,
"portfolio_id": body.portfolio_id,
"housing_type": body.housing_type,
"goal": body.goal,
"goal_value": body.goal_value,
"trigger_file_path": body.trigger_file_path,
"already_installed_file_path": body.already_installed_file_path,
"patches_file_path": body.patches_file_path,
"non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path,
"exclusions": body.exclusions,
"multi_plan": body.multi_plan
}
)
# Insert the scenario ID into the data payload
data["scenario_id"] = scenario_id
# Create a main task
task_id, _ = TasksInterface.create_task(
task_source="backend/plan/router.py:trigger_plan_entrypoint",
service="plan_engine",
inputs=data,
task_only=True
)
subtask_interface = SubTaskInterface()
for i in range(total_chunks):
# Create an entry in the request logs table
index_start = i * chunk_size
index_end = min((i + 1) * chunk_size, total_rows)
message_payload = {**data, "index_start": index_start, "index_end": index_end}
message_payload = {
**data, "index_start": index_start, "index_end": index_end,
}
# Create a subtask for this chunk
subtask_id = subtask_interface.create_subtask(
task_id=task_id,
inputs=message_payload
)
# Add task and subtask to message
message_payload["task_id"] = str(task_id)
message_payload["subtask_id"] = str(subtask_id)
message_body = json.dumps(message_payload)
response = sqs_client.send_message(
@ -96,7 +132,9 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
f"Chunk {i} sent to SQS. Rows {index_start}{index_end}. Message ID: {response.get('MessageId')}"
)
await asyncio.sleep(random.uniform(0.1, 0.5)) # Delay to reduce spike pressure
await asyncio.sleep(0.05) # Small delay to avoid SQS throttling
# await asyncio.sleep(random.uniform(0.1, 0.5)) # Delay to reduce spike pressure
except Exception as e:
logger.error("Error during Excel file handling: %s", e)
@ -105,6 +143,14 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
else:
# Fallback: Just send a single message
try:
task_id, subtask_id = TasksInterface.create_task(
task_source="backend/plan/router.py:trigger_plan_entrypoint",
service="plan_engine",
inputs=data,
task_only=False,
)
data["task_id"] = str(task_id)
data["subtask_id"] = str(subtask_id)
message_body = json.dumps(data)
response = sqs_client.send_message(
QueueUrl=settings.ENGINE_SQS_URL,

View file

@ -17,7 +17,7 @@ ECO4_ELIGIBILE_FABRIC_MEASURES = [
"suspended_floor_insulation", "solid_floor_insulation", "double_glazing", "secondary_glazing"
]
ECO4_ELIGIBLE_HEATING_MEASURES = [
"boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump", "solar_pv"
"boiler_upgrade", "high_heat_retention_storage_heaters", "air_source_heat_pump", "solar_pv"
]
SPECIFIC_MEASURES = (
@ -48,14 +48,14 @@ MEASURE_MAP = {
],
"roof_insulation": ["loft_insulation", "flat_roof_insulation", "room_roof_insulation"],
"floor_insulation": ["suspended_floor_insulation", "solid_floor_insulation"],
"heating": ["boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump"],
"heating": ["boiler_upgrade", "high_heat_retention_storage_heaters", "air_source_heat_pump"],
"windows": ["double_glazing", "secondary_glazing"],
"heating_controls": ["roomstat_programmer_trvs", "time_temperature_zone_control"]
}
VALID_GOALS = ["Increasing EPC", "Energy Savings", "Reducing CO2 emissions"]
VALID_HOUSING_TYPES = ["Social", "Private"]
VALID_EVENT_TYPES = ["remote_assessment"]
VALID_EVENT_TYPES = ["remote_assessment", "eco_project"]
# Define the validation function for inclusions/exclusions
@ -113,7 +113,7 @@ class PlanTriggerRequest(BaseModel):
# When performing a remote assessment, if this has been set, it will allow the engine to
# pull data from the find my epc website, to utilise as part of a remote assessment
event_type: Optional[Literal["remote_assessment"]] = None
event_type: Optional[Literal["remote_assessment", "eco_project"]] = None
# If true, before optimising the engine will select a slightly larger package, to account for the SAP 10 causing
# scores to drop by a few points
@ -129,6 +129,13 @@ class PlanTriggerRequest(BaseModel):
index_start: Optional[int] = None
index_end: Optional[int] = None
# Task and subtask IDs
task_id: Optional[str] = None
subtask_id: Optional[str] = None
# Optional flag to trigger a fabric first task
enforce_fabric_first: Optional[bool] = False
@model_validator(mode="after")
def check_indexes(self):
if (self.index_start is None) != (self.index_end is None):

View file

@ -1,7 +1,16 @@
from utils.s3 import read_from_s3
from backend.app.config import get_settings
import ast
import os
import msgpack
from uuid import UUID
from utils.s3 import read_from_s3
from backend.addresses.Address import Address
from backend.app.config import get_settings
from backend.app.plan.data_classes import PropertyRequestData
from backend.app.db.functions.tasks.Tasks import SubTaskInterface
from starlette.responses import Response
from utils.logger import setup_logger
logger = setup_logger()
def get_cleaned():
@ -21,3 +30,217 @@ def get_cleaned():
cleaned = msgpack.unpackb(cleaned, raw=False)
return cleaned
def patch_epc(patch, epc_records):
"""
This utility function is useful to patch the epc data if we have data from the customer
:return:
"""
for patch_variable, patch_value in patch.items():
if patch_variable in ["address", "postcode"]:
continue
if patch_value in ["", None]:
continue
if patch_variable in epc_records["original_epc"]:
epc_records["original_epc"][patch_variable] = patch_value
return epc_records
def extract_property_request_data(
address: Address, patches, non_invasive_recommendations, valuation_data, uprn
):
patch_has_uprn = "uprn" in patches[0] if patches else True
if patch_has_uprn:
patch = next((
x for x in patches if str(x["uprn"]) == str(address.uprn)
), {})
else:
patch = next((
x for x in patches if (x["address"] == address.address) and (x["postcode"] == address.postcode)
), {})
# Because we have some non-invasive recommendations that match on address and postcode, but not UPRN
# we need to check existence of uprn
has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False
if has_uprn:
has_uprn = non_invasive_recommendations[0]["uprn"] not in ["", None]
if has_uprn:
property_non_invasive_recommendations = next((
x for x in non_invasive_recommendations if
(str(x["uprn"]) == str(uprn))
), {})
# We patch the non-invasive recs that are ['cavity_extract_and_refill']
else:
property_non_invasive_recommendations = next((
x for x in non_invasive_recommendations if
(x["address"] == address.address) and (x["postcode"] == address.postcode)
), {})
if isinstance(property_non_invasive_recommendations.get("recommendations"), str):
property_non_invasive_recommendations["recommendations"] = ast.literal_eval(
property_non_invasive_recommendations["recommendations"]
)
transformed = []
for rec in property_non_invasive_recommendations["recommendations"]:
if isinstance(rec, str):
transformed.append({"type": rec, })
else:
transformed.append(rec)
property_non_invasive_recommendations["recommendations"] = transformed
# Check if the valuation data has uprn
valuation_has_uprn = "uprn" in valuation_data[0] if valuation_data else False
if valuation_has_uprn:
valuation_has_uprn = valuation_data[0]["uprn"] not in ["", None]
if valuation_has_uprn:
property_valuation = next((
float(x["valuation"]) for x in valuation_data if
(str(x["uprn"]) == str(uprn))
), None)
else:
property_valuation = next((
float(x["valuation"]) for x in valuation_data if
(x["address"] == address.address) and (x["postcode"] == address.postcode)
), None)
# Return data class to give a structured format
return PropertyRequestData(
patch=patch,
non_invasive_recommendations=property_non_invasive_recommendations,
valuation=property_valuation
)
def parse_eco_packages(addr: Address, prepared_epc) -> tuple[list[str], int, str, list[str]] | tuple[
None, None, None, list]:
solar_identification = addr.solar_reason
cavity_identification = addr.cavity_reason
if not solar_identification and not cavity_identification:
return None, None, None, []
landlord_heating_system = addr.landlord_heating_system
# This is the initial version of tackling "already installed" measures
already_installed = []
if landlord_heating_system == "air source heat pump":
already_installed.append("air_source_heat_pump")
# We map the categories to the desired measures and upgrade targets
# We note that the categories are placeholder until we move the standardised asset list
identification_map = {
"Solar Eligible": {
"measures": ["solar_pv", "loft_insulation", "mechanical_ventilation"],
"target_sap": 86, # High B
"plan_type": "solar_eco4"
},
"Solar Eligible, Solid Wall Uninsulated, EPC E or Below": {
"measures": ["solar_pv", "loft_insulation", "mechanical_ventilation"],
"target_sap": 86, # High B
"plan_type": "solar_eco4"
},
"Solar Eligible, Needs Heating Upgrade": {
"measures": ["solar_pv", "loft_insulation", "high_heat_retention_storage_heaters",
"mechanical_ventilation"],
"target_sap": 86, # High B
"plan_type": "solar_hhrsh_eco4"
},
"Non-Intrusive Data Shows Empty Cavity": {
"measures": ["cavity_wall_insulation", "mechanical_ventilation"],
"target_sap": 69, # Low C
"plan_type": "empty_cavity_eco"
},
'Non-Intrusive Data Shows Empty Cavity, built after 2002': {
"measures": ["cavity_wall_insulation", "mechanical_ventilation"],
"target_sap": 69, # Low C
"plan_type": "empty_cavity_eco"
},
"EPC Shows Empty Cavity, inspections show retro drilled": {
# EPC Indicates it's empty, so we simulate a fill
"measures": ["cavity_wall_insulation", "mechanical_ventilation"],
"target_sap": 69, # Low C
"plan_type": "extraction_eco"
},
"EPC Shows Empty Cavity, inspections show filled at build": {
# EPC Indicates it's empty, so we simulate a fill
"measures": ["cavity_wall_insulation", "mechanical_ventilation"],
"target_sap": 69, # Low C
"plan_type": "extraction_eco"
},
"EPC Shows Empty Cavity": {
# EPC Indicates it's empty, so we simulate a fill
"measures": ["cavity_wall_insulation", "mechanical_ventilation"],
"target_sap": 69, # Low C
"plan_type": "empty_cavity_eco"
}
}
# Always prioritise solar
if solar_identification:
_key = solar_identification.split(":")[0]
else:
_key = cavity_identification.split(":")[0]
mapped = identification_map[_key]
measures = mapped["measures"]
# If we have already installed an ASHP, we adjust the measures
if "air_source_heat_pump" in already_installed:
if "high_heat_retention_storage_heaters" in measures:
# If we have a HHRSH already, we remove it
measures.remove("high_heat_retention_storage_heaters")
# Add in ASHP (replacing HHRSH if already had)
measures.append("air_source_heat_pump")
current_sap = prepared_epc.current_energy_efficiency
# If we have a solar package, and the property is a D or above, we don't need to do lofts
if "solar_eco4" in mapped["plan_type"] and current_sap >= 55:
if "loft_insulation" in measures:
measures.remove("loft_insulation")
return measures, mapped["target_sap"], mapped["plan_type"], already_installed
def build_cloudwatch_log_url(start_ms: int) -> str:
"""
Build a CloudWatch Logs URL for the current Lambda invocation,
including timestamp window from start_ms to end_ms (epoch ms).
"""
region = os.environ["AWS_REGION"]
log_group = os.environ["AWS_LAMBDA_LOG_GROUP_NAME"]
log_stream = os.environ["AWS_LAMBDA_LOG_STREAM_NAME"]
# CloudWatch console requires / encoded as $252F
encoded_group = log_group.replace("/", "$252F")
encoded_stream = log_stream.replace("/", "$252F")
# Return the full URL with time range
return (
f"https://console.aws.amazon.com/cloudwatch/home?"
f"region={region}"
f"#logsV2:log-groups/log-group/{encoded_group}"
f"/log-events/{encoded_stream}"
f"$3Fstart={start_ms}"
)
def handle_error(msg, e, subtask_id, status=500, start_ms=None):
# When the pipeline fails, handles error process
cloud_logs_url = build_cloudwatch_log_url(start_ms)
SubTaskInterface().update_subtask_status(
subtask_id=UUID(subtask_id),
status="failed",
outputs=str(e),
cloud_logs_url=cloud_logs_url
)
logger.error(msg, exc_info=True)
return Response(status_code=status, content=msg)

View file

@ -12,3 +12,5 @@ boto3==1.35.44
openpyxl==3.1.2
# Basic
pytz
sqlmodel

View file

189
backend/app/tasks/router.py Normal file
View file

@ -0,0 +1,189 @@
from fastapi import APIRouter, Depends, HTTPException
from uuid import UUID
import json # ← REQUIRED for json.loads
from backend.app.dependencies import validate_token
from backend.app.tasks.schema import (
CreateTaskRequest,
UpdateTaskStatusRequest,
CreateSubTaskRequest,
UpdateSubTaskStatusRequest,
FinalizeSubTaskRequest,
TaskSqsTriggerRequest
)
# Correct location of interfaces
from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface
from backend.app.db.connection import get_db_session
from backend.app.db.models.tasks import Task, SubTask
from sqlmodel import select
router = APIRouter(
prefix="/tasks",
tags=["tasks"],
dependencies=[Depends(validate_token)],
)
# ============================================================
# Create Task
# ============================================================
@router.post("/", summary="Create a new task and its first subtask")
async def create_task(req: CreateTaskRequest):
tasks = TasksInterface()
task_id, subtask_id = tasks.create_task(
task_source=req.task_source,
service=req.service,
inputs=req.inputs,
)
return {"task_id": task_id, "subtask_id": subtask_id}
# ============================================================
# Get Task + Subtasks
# ============================================================
@router.get("/{task_id}", summary="Get a task and its subtasks")
async def get_task(task_id: UUID):
with get_db_session() as session:
task = session.get(Task, task_id)
if not task:
raise HTTPException(status_code=404, detail="Task not found")
subtasks = session.exec(
select(SubTask).where(SubTask.taskId == task_id)
).all()
formatted = []
for st in subtasks:
formatted.append({
**st.dict(),
"inputs": json.loads(st.inputs) if st.inputs else None,
"outputs": json.loads(st.outputs) if st.outputs else None,
"cloud_logs_url": st.cloudLogsURL,
})
return {
"task": task,
"subtasks": formatted,
}
# ============================================================
# Update Task Status
# ============================================================
@router.put("/{task_id}/status", summary="Update a task's status")
async def update_task_status(task_id: UUID, req: UpdateTaskStatusRequest):
tasks = TasksInterface()
try:
updated = tasks.update_task_status(task_id, req.status)
return {"task_id": updated.id, "status": updated.status}
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
# ============================================================
# Create Additional Subtask
# ============================================================
@router.post("/{task_id}/subtasks", summary="Create a new subtask under a task")
async def create_subtask(task_id: UUID, req: CreateSubTaskRequest):
subtasks = SubTaskInterface()
try:
st = subtasks.create_subtask(task_id, req.inputs)
return {"subtask_id": st.id, "task_id": task_id, "status": st.status}
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
# ============================================================
# Update Subtask Status
# ============================================================
@router.put("/subtask/{subtask_id}/status", summary="Update a subtask's status")
async def update_subtask_status(subtask_id: UUID, req: UpdateSubTaskStatusRequest):
subtasks = SubTaskInterface()
try:
st = subtasks.update_subtask_status(subtask_id, req.status)
return {"subtask_id": st.id, "status": st.status}
except ValueError as e:
raise HTTPException(status_code=404, detail=str(e))
# ===
# Sub task is complete
@router.post("/subtask/{subtask_id}/finalize", summary="Finalize a subtask with status, outputs, logs")
async def finalize_subtask(subtask_id: UUID, req: FinalizeSubTaskRequest):
subtasks = SubTaskInterface()
try:
st = subtasks.finalize_subtask(
subtask_id=subtask_id,
status=req.status,
outputs=req.outputs,
cloud_logs_url=req.cloud_logs_url
)
return {
"subtask_id": st.id,
"status": st.status,
"outputs": req.outputs,
"cloud_logs_url": req.cloud_logs_url,
}
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
# for testing:
import boto3
import json
from backend.app.tasks.schema import TaskSqsTriggerRequest
from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface
from backend.app.config import get_settings
sqs = boto3.client("sqs")
@router.post("/trigger", summary="Create task + subtask and publish to SQS", status_code=202)
async def trigger_task(req: TaskSqsTriggerRequest):
"""
Creates a Task + SubTask, then pushes the SubTask into SQS so a Lambda can process it.
If inputs are empty, automatically replaced with {}.
"""
settings = get_settings()
tasks = TasksInterface()
# ---- Normalize empty inputs ----
inputs = req.inputs or {} # ensures {} even if null
# ---- 1. Create Task + SubTask ----
task_id, subtask_id = tasks.create_task(
task_source=req.task_source,
service=req.service,
inputs=inputs,
)
# ---- 2. Prepare SQS payload ----
sqs_payload = {
"subtask_id": str(subtask_id),
"params": inputs,
}
try:
response = sqs.send_message(
QueueUrl=f"https://sqs.{settings.AWS_REGION}.amazonaws.com/"
f"{settings.AWS_ACCOUNT_ID}/lambda-example-queue",
MessageBody=json.dumps(sqs_payload)
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"SQS error: {e}")
return {
"message": "Task triggered",
"task_id": task_id,
"subtask_id": subtask_id,
"sqs_message_id": response.get("MessageId"),
"inputs_sent": inputs,
}

View file

@ -0,0 +1,31 @@
from typing import Optional, Any, Dict
from uuid import UUID
from pydantic import BaseModel
class CreateTaskRequest(BaseModel):
task_source: str
service: Optional[str] = None
inputs: Optional[Dict[str, Any]] = None # JSON object
class UpdateTaskStatusRequest(BaseModel):
status: str
class CreateSubTaskRequest(BaseModel):
inputs: Optional[Dict[str, Any]] = None # JSON object
class UpdateSubTaskStatusRequest(BaseModel):
status: str
class FinalizeSubTaskRequest(BaseModel):
status: str # "complete" or "failed"
outputs: Optional[Dict[str, Any]] = None
cloud_logs_url: Optional[str] = None
class TaskSqsTriggerRequest(BaseModel):
task_source: str
service: Optional[str] = None
inputs: Dict[str, Any] # forwarded into SubTask.inputs + SQS message

View file

View file

@ -0,0 +1,77 @@
# import boto3
# import json
# import math
# import asyncio
# import random
#
# from datetime import datetime
from fastapi import APIRouter, Depends
from backend.app.dependencies import validate_token
# from backend.app.plan.schemas import PlanTriggerRequest
from backend.app.config import get_settings
# from sqlalchemy.orm import sessionmaker
from utils.logger import setup_logger
# from backend.app.db.connection import db_engine
# from backend.app.db.functions.recommendations_functions import create_scenario
# import pandas as pd
from backend.app.whlg.schema import WHLGElligibilityRequest
# from utils.s3 import read_csv_from_s3
# from sqlalchemy.dialects.postgresql import insert
# from backend.app.db.connection import get_db_session
# from backend.app.db.models.whlg import Whlg
# from backend.app.db.functions.whlg_functions import upsert_whlg_postcode
logger = setup_logger()
if get_settings().ENVIRONMENT == "local":
router = APIRouter(
prefix="/whlg",
tags=["whlg"],
)
else:
router = APIRouter(
prefix="/whlg",
tags=["whlg"],
dependencies=[Depends(validate_token)],
responses={404: {"description": "Not found"}}
)
@router.get("/")
async def whlg_entrypoint():
# body needs to include postcode, UPRN [task ID?]
#
# Refer to the plan trigger route for code
# 1) Create an event schema and store it in the schemas file
# 2) Build the tasks functions
# 3) Read in the funding csx. This can be found as such:
# whlg_eligible_postcodes = read_csv_from_s3(
# bucket_name=get_settings().DATA_BUCKET,
# filepath="funding/whlg eligible postcodes.csv",
# )
# whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes)
# Check the postcode against this file
# We need to store this somewhere????!!!??!??!?!?!?!??!??!??!??!??!??!??!??!??!??! Create a new table!
# Update subtask to be complete
# Once this is complete, build the logs stuff, add the cloudwatch logs ID to the database
return {"hello": "from whlg"}
@router.post("/eligible")
async def eligiable(body: WHLGElligibilityRequest):
# postcode = body.postcode or ""
# postcode = postcode.lower().replace(" ", "")
#
# whlg_eligible_postcodes = read_csv_from_s3(
# bucket_name=get_settings().DATA_BUCKET,
# filepath="funding/whlg eligible postcodes.csv",
# )
# whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes)
# whlg_eligible_postcodes['Postcode'] = whlg_eligible_postcodes['Postcode'].str.replace(' ', '', regex=False)
#
# is_eligible = postcode in whlg_eligible_postcodes['Postcode'].values
# return {"whlg_eligible": is_eligible}
return None

View file

@ -0,0 +1,4 @@
from pydantic import BaseModel, Field
class WHLGElligibilityRequest(BaseModel):
postcode: str = Field(..., example="B93 8SY")

View file

View file

@ -0,0 +1,12 @@
from enum import Enum
class FileType(Enum):
LBWF = "lbwf"
def detect_file_type(filepath: str) -> FileType:
path = filepath.lower()
if "lbwf" in path:
return FileType.LBWF
raise ValueError("Unrecognised file path")

View file

@ -0,0 +1,16 @@
from typing import Mapping, Any
from io import BytesIO
from utils.logger import setup_logger
from backend.condition.processor import process_file
logger = setup_logger()
def handler(event: Mapping[str, Any], context: Any) -> None:
# Temporary stub for PoC wiring
dummy_stream = BytesIO(b"")
source_key = event.get("source_key", "unknown-source")
process_file(dummy_stream, source_key)

View file

@ -0,0 +1,25 @@
from pathlib import Path
from backend.condition.processor import process_file
def main() -> None:
try:
# Works in scripts / debugger / pytest
ROOT_DIR = Path(__file__).resolve().parents[1]
except NameError:
# __file__ is not defined in notebooks
ROOT_DIR = Path.cwd()
path: Path = ROOT_DIR / "condition" / "sample_data"
lbwf_path: Path = path / "lbwf" / "LBWF - Example Asset Data September 2025.xlsx" # TODO: get this from s3 as part of devcontainer init
with lbwf_path.open("rb") as f:
process_file(
file_stream=f,
source_key=lbwf_path.as_posix(),
)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,9 @@
from backend.condition.file_type import FileType
from backend.condition.parsing.parser import Parser
from backend.condition.parsing.lbwf_parser import LbwfParser
def select_parser(file_type: FileType) -> Parser:
if file_type is FileType.LBWF:
return LbwfParser()
raise ValueError("Unrecognised file type, unable to instantiate Parser")

View file

@ -0,0 +1,180 @@
from typing import BinaryIO, Any, Dict, Iterator, List, Tuple
from openpyxl import Workbook, load_workbook
from collections import defaultdict
from backend.condition.parsing.parser import Parser
from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition
from backend.condition.parsing.records.lbwf.lbwf_house import LbwfHouse
from backend.condition.utils.date_utils import normalise_date
from utils.logger import setup_logger
logger = setup_logger
class LbwfParser(Parser):
def parse(self, file_stream: BinaryIO) -> Any:
wb: Workbook = load_workbook(file_stream)
address_to_uprn_map: Dict[str, int] = self._generate_address_to_uprn_dict(wb)
assets = self._parse_assets(wb)
houses = self._parse_houses(wb, address_to_uprn_map)
self._merge_assets_into_houses(assets, houses)
return houses
@staticmethod
def _parse_assets(wb: Workbook) -> List[LbwfAssetCondition]:
assets_sheet = wb["Houses Asset Data"]
asset_rows = assets_sheet.iter_rows(values_only=True)
asset_headers = next(asset_rows)
asset_header_indexes = LbwfParser._get_column_indexes_by_name(asset_headers)
assets: List[LbwfAssetCondition] = []
for row in asset_rows:
try:
assets.append(
LbwfParser._map_row_to_asset_record(row, asset_header_indexes)
)
except Exception as e:
logger.error(f"Error mapping LBWF row to asset record: {e}")
continue
return assets
@staticmethod
def _parse_houses(
wb: Workbook,
address_to_uprn_map: Dict[str, int],
) -> List[LbwfHouse]:
houses_sheet = wb["Houses"]
house_rows = houses_sheet.iter_rows(values_only=True)
house_headers = next(house_rows)
house_header_indexes = LbwfParser._get_column_indexes_by_name(house_headers)
houses: List[LbwfHouse] = []
for row in house_rows:
try:
houses.append(
LbwfParser._map_row_to_house_record(
row,
house_header_indexes,
address_to_uprn_map,
)
)
except Exception as e:
logger.error(f"Error mapping LBWF row to house record: {e}")
continue
return houses
@staticmethod
def _merge_assets_into_houses(
assets: List[LbwfAssetCondition],
houses: List[LbwfHouse],
) -> None:
assets_by_ref: Dict[int, List[LbwfAssetCondition]] = defaultdict(list)
for asset in assets:
assets_by_ref[asset.prop_ref].append(asset)
for house in houses:
house.assets = assets_by_ref.get(house.reference, [])
@staticmethod
def _map_row_to_house_record(
row: Any | Tuple[object | None, ...],
header_indexes: Dict[str, int],
address_to_uprn_map: Dict[str, int],
) -> LbwfHouse:
address: str = row[header_indexes["Address"]]
return LbwfHouse(
uprn=LbwfParser._get_uprn_from_address(address, address_to_uprn_map),
reference=row[header_indexes["Reference"]],
address=address,
epc=row[header_indexes["EPC "]],
shdf=row[header_indexes["SHDF"]],
house=row[header_indexes["HOSUE"]],
fail_decency=row[header_indexes["Fail Decency"]],
assets=[],
)
@staticmethod
def _map_row_to_asset_record(
row: Any | Tuple[object | None, ...],
header_indexes: Dict[str, int],
) -> LbwfAssetCondition:
return LbwfAssetCondition(
prop_ref=row[header_indexes["PROP REF"]],
domna=row[header_indexes["Domna"]],
address=row[header_indexes["ADDRESS"]],
ownership=row[header_indexes["OWNERSHIP"]],
prop_status=row[header_indexes["PROP STATUS"]],
prop_type=row[header_indexes["PROP TYPE"]],
prop_sub_type=row[header_indexes["PROP SUB TYPE"]],
element_group=row[header_indexes["ELEMENT GROUP"]],
element_code=row[header_indexes["ELEMENT CODE"]],
element_code_description=row[header_indexes["ELEMENT CODE DESCRIPTION"]],
attribute_code=row[header_indexes["ATTRIBUTE CODE"]],
attribute_code_description=row[header_indexes["ATTRIBUTE CODE DESCRIPTION"]],
element_date_value=row[header_indexes["ELEMENT DATE VALUE"]],
element_numerical_value=row[header_indexes["ELEMENT NUMERIC VALUE"]],
element_text_value=row[header_indexes["ELEMENT TEXT VALUE"]],
quantity=row[header_indexes["QUANTITY"]],
install_date=normalise_date(row[header_indexes["INSTALL DATE"]]),
remaining_life=row[header_indexes["REMAINING LIFE"]],
element_comments=row[header_indexes["ELEMENT COMMENTS"]],
)
@staticmethod
def _generate_address_to_uprn_dict(wb: Workbook) -> Dict[str, int | None]:
sheet: Workbook = wb["All Energy Breakdown "]
rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True)
headers = next(rows)
header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers)
address_idx = header_indexes["Address"]
uprn_idx = header_indexes["UPRN"]
mapping: Dict[str, int | None] = {}
for row in rows:
address = row[address_idx]
uprn = row[uprn_idx]
if not isinstance(address, str):
continue
if uprn is not None and not isinstance(uprn, int):
raise ValueError(f"Unexpected UPRN value: {uprn!r}")
mapping[address] = uprn
return mapping
def _get_column_indexes_by_name(
headers: Tuple[object | None, ...]
) -> Dict[str, int]:
index: Dict[str, int] = {}
for i, header in enumerate(headers):
if isinstance(header, str):
index[header] = i
return index
def _get_uprn_from_address(address: str, address_to_uprn_map: Dict[str, int]) -> int | None:
pseudo_name = address.split(",")[0]
if pseudo_name.lower() in (k.lower() for k in address_to_uprn_map.keys()):
return address_to_uprn_map[pseudo_name.upper()]
return None

View file

@ -0,0 +1,8 @@
from abc import ABC, abstractmethod
from typing import BinaryIO, Any
class Parser(ABC):
@abstractmethod
def parse(self, file_stream: BinaryIO) -> Any:
pass

View file

@ -0,0 +1,26 @@
from dataclasses import dataclass
from datetime import date
@dataclass
class LbwfAssetCondition:
prop_ref: int
domna: int
address: str
ownership: str
prop_status: str
prop_type: str # TODO: make this enum?
prop_sub_type: str # TODO: make this enum?
element_group: str
element_code: str
element_code_description: str
attribute_code: str
attribute_code_description: str
element_date_value: str | None = None
element_numerical_value: int | None = None
element_text_value: str | None = None
quantity: int | None = None
install_date: date | None = None
remaining_life: int | None = None
element_comments: str | None = None

View file

@ -0,0 +1,15 @@
from dataclasses import dataclass
from typing import List
from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition
@dataclass
class LbwfHouse:
uprn: int
reference: int
address: str
epc: str # TODO: make enum
shdf: bool
house: str
fail_decency: int
assets: List[LbwfAssetCondition]

View file

@ -0,0 +1,18 @@
from typing import Any, BinaryIO, List
from backend.condition.parsing.parser import Parser
from utils.logger import setup_logger
from backend.condition.file_type import FileType, detect_file_type
from backend.condition.parsing.factory import select_parser
def process_file(file_stream: BinaryIO, source_key: str) -> None:
print(f"[processor] Received file: {source_key}")
# Instantiation
file_type: FileType = detect_file_type(source_key)
parser: Parser = select_parser(file_type)
# Orchestration
records: List[Any] = parser.parse(file_stream)
print(records) # temp

View file

@ -0,0 +1,134 @@
from typing import Any
import pytest
from io import BytesIO
from openpyxl import Workbook
from datetime import datetime
from backend.condition.parsing.lbwf_parser import LbwfParser
from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition
from backend.condition.parsing.records.lbwf.lbwf_house import LbwfHouse
@pytest.fixture
def lbwf_homes_xlsx_bytes() -> BytesIO:
wb = Workbook()
houses_asset_data = wb.active
houses_asset_data.title = "Houses Asset Data"
houses_asset_data.append([
"PROP REF",
"Domna",
"ADDRESS",
"OWNERSHIP",
"PROP STATUS",
"PROP TYPE",
"PROP SUB TYPE",
"ELEMENT GROUP",
"ELEMENT CODE",
"ELEMENT CODE DESCRIPTION",
"ATTRIBUTE CODE",
"ATTRIBUTE CODE DESCRIPTION",
"ELEMENT DATE VALUE",
"ELEMENT NUMERIC VALUE",
"ELEMENT TEXT VALUE",
"QUANTITY",
"INSTALL DATE",
"REMAINING LIFE",
"ELEMENT COMMENTS"
]
)
houses_asset_data.append([
12345,
12345,
"123 Fake Street, London, A10 1AB",
"LBWF_OWNED",
"OCCP",
"HOU",
"TERRACED",
"ASSETS",
"AHR_CAT",
"Accessible Housing Register Category",
"F",
"General Needs",
None,
None,
None,
1,
None,
None,
None,
])
houses_asset_data.append([
54321,
54321,
"100 Random Road, London, A10 1AB",
"LBWF_OWNED",
"OCCP",
"HOU",
"EOT",
"ASSETS",
"INTSMKDET",
"Smoke Detectors in Property",
"HARDWRDMNS",
"Hard Wired Mains Smoke Alarm in Property",
None,
None,
None,
2,
datetime(2019,4,1),
4,
"Source of Data = Joe Bloggs",
])
houses = wb.create_sheet("Houses")
houses.append(["Reference", "Address", "EPC ", "SHDF", "HOSUE", "Fail Decency"])
houses.append([12345, "123 Fake Street, London, A10 1AB", "E", "NO", "HOUSE", 2025])
houses.append([54321, "100 Random Road, London, A10 1AB", "F", "NO", "HOUSE", 2025])
all_energy_breakdown = wb.create_sheet("All Energy Breakdown ") # Trailing space is intentional; matches source
all_energy_breakdown.append([
"UPRN",
"Organisation Reference",
"Alternate Organisation Reference",
"Address",
"Postcode"
])
all_energy_breakdown.append([
1,
200,
None,
"123 FAKE STREET",
"A10 1AB"
])
all_energy_breakdown.append([
2,
100,
101,
"100 RANDOM ROAD",
"A10 1AB"
])
stream = BytesIO()
wb.save(stream)
stream.seek(0)
return stream
def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes):
# arrange
parser = LbwfParser()
# act
result: Any = parser.parse(lbwf_homes_xlsx_bytes)
# assert
# TODO: Improve these asserts
assert len(result) == 2
assert isinstance(result[0], LbwfHouse)
assert result[0].uprn == 1
assert len(result[0].assets) == 1
assert isinstance(result[0].assets[0], LbwfAssetCondition)
assert isinstance(result[1], LbwfHouse)
assert result[1].uprn == 2
assert len(result[1].assets) == 1
assert isinstance(result[1].assets[0], LbwfAssetCondition)

View file

@ -0,0 +1,15 @@
import pytest
from backend.condition.parsing.factory import select_parser
from backend.condition.file_type import FileType
def test_selects_lbwf_parser():
# arrange
file_type = FileType.LBWF
expected_class_name = "LbwfParser"
# act
actual_class_name = select_parser(file_type).__class__.__name__
# assert
assert expected_class_name == actual_class_name

View file

@ -0,0 +1,22 @@
import pytest
from backend.condition.file_type import FileType, detect_file_type
def test_detects_lbwf_file_type():
# arrange
file_path_str = "uploads/lbwf/Exaple Asset Data.xlsx"
expected_file_type = FileType.LBWF
# act
actual_file_type: FileType = detect_file_type(file_path_str)
# assert
assert expected_file_type == actual_file_type
def test_unknown_filepath_raises_value_error():
# arrange
file_path_str = "unknown/Example Asset Data.xlsx"
# act + assert
with pytest.raises(ValueError):
detect_file_type(file_path_str)

View file

@ -0,0 +1,18 @@
from datetime import datetime, date
from typing import Any
def normalise_date(value: Any, allow_none: bool = True) -> date | None:
if value is None and allow_none:
return None
if isinstance(value, datetime):
return value.date()
if isinstance(value, str):
try:
return datetime.strptime(value.strip(), "%d/%m/%Y").date()
except ValueError as exc:
raise ValueError(f"Invalid date string: {value!r}") from exc
raise ValueError(f"Unexpected date value: {value!r}")

View file

@ -0,0 +1,3 @@
"""
This script is set up to perform broad portfolio diagnostics to identify potential issues
"""

File diff suppressed because it is too large Load diff

View file

@ -26,21 +26,21 @@ class AnnualBillSavings:
AVERAGE_ELECTRICITY_CONSUMPTION = 2700
AVERAGE_GAS_CONSUMPTION = 11500
# Latest price cap figures from Ofgem are for April 2024
# Latest price cap figures from Ofgem are for Jan 2026 to March 2026
# https://www.ofgem.gov.uk/energy-price-cap
ELECTRICITY_PRICE_CAP = 0.2573
GAS_PRICE_CAP = 0.0633
# This is the most recent export payment figure, at 9.28p/kWh
ELECTRICITY_PRICE_CAP = 0.2769
GAS_PRICE_CAP = 0.0593
# This is the most recent export payment figure, at 13p/kWh - Updated Nov 2025
# Smart export guarantee rates can be found here:
# https://www.sunsave.energy/solar-panels-advice/exporting-to-the-grid/best-seg-rates
ELECTRICITY_EXPORT_PAYMENT = 0.0928
ELECTRICITY_EXPORT_PAYMENT = 0.13
# This is a weighted mean of the price caps, using the consumption figures above as weights
PRICE_FACTOR = 0.09549999999999999
# Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
DAILY_STANDARD_CHARGE_GAS = 0.2982
DAILY_STANDARD_CHARGE_ELECTRICITY = 0.5137
DAILY_STANDARD_CHARGE_GAS = 0.3509
DAILY_STANDARD_CHARGE_ELECTRICITY = 0.5475
# Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison
# For July 2024. These quotes are based on the east midlands region, so we
@ -263,7 +263,8 @@ class AnnualBillSavings:
if fuel == "Electricity":
return (kwh / cop) * cls.ELECTRICITY_PRICE_CAP
if fuel in ["Natural Gas", "Natural Gas (Community Scheme)"]:
# We handle "Unmapped" in a similar fashion to gas
if fuel in ["Natural Gas", "Natural Gas (Community Scheme)", "Unmapped"]:
return (kwh / cop) * cls.GAS_PRICE_CAP
if fuel == "LPG":
@ -285,10 +286,18 @@ class AnnualBillSavings:
# The solar thermal covers a % of the heating kwh, so we need to adjust the cost
return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.GAS_PRICE_CAP
if fuel == "Electricity + Solar Thermal":
if fuel in ["Electricity + Solar Thermal", 'Unmapped + Solar Thermal']:
# The solar thermal covers a % of the heating kwh, so we need to adjust the cost
return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.ELECTRICITY_PRICE_CAP
if fuel in ['Oil + Solar Thermal']:
# The solar thermal covers a % of the heating kwh, so we need to adjust the cost
price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Kerosene"].squeeze()
cost_per_kwh = cls.cost_per_kwh(
price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"]
)
return (kwh / cop) * cost_per_kwh * assumptions.SOLAR_CONSUMPTION_PROPORTION
if fuel == "LPG + Solar Thermal":
# The solar thermal covers a % of the heating kwh, so we need to adjust the cost
price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "LPG"].squeeze()

View file

@ -219,12 +219,19 @@ class PropertyValuation:
current_epc = property_instance.data["current-energy-rating"]
if not current_value:
# In this case, we return a % improvement rather than an absolute
relative_improvement = cls.estimate_valuation_improvement(
current_value=1,
current_epc=current_epc,
target_epc=target_epc,
total_cost=1
)
return {
"current_value": 0,
"lower_bound_increased_value": 0,
"upper_bound_increased_value": 0,
"average_increased_value": 0,
"average_increase": 0
"lower_bound_increased_value": relative_improvement["lower_bound_increased_value"] - 1,
"upper_bound_increased_value": relative_improvement["upper_bound_increased_value"] - 1,
"average_increased_value": relative_improvement["average_increased_value"] - 1,
"average_increase": relative_improvement["average_increase"]
}
return cls.estimate_valuation_improvement(current_value, current_epc, target_epc, total_cost)

View file

@ -44,7 +44,7 @@ class ModelApi:
self.timestamp = timestamp
self.prediction_buckets = prediction_buckets
self.max_retries = max_retries
self.semaphore = asyncio.Semaphore(2)
self.semaphore = asyncio.Semaphore(3)
@staticmethod
def get_aiohttp_session():
@ -117,7 +117,7 @@ class ModelApi:
}
async with self.semaphore:
await asyncio.sleep(random.uniform(0.3, 1.2))
# await asyncio.sleep(random.uniform(0.3, 1.2))
try:
async with session.post(url, json=payload, headers=headers, timeout=120) as response:
if response.status != 200:
@ -142,7 +142,8 @@ class ModelApi:
@staticmethod
def extract_phase(recommendation_id):
if 'phase=' in recommendation_id:
return int(recommendation_id.split('phase=')[1][0])
extracted = recommendation_id.split('phase=')[1]
return int(extracted.strip())
else:
return None
@ -211,13 +212,14 @@ class ModelApi:
response = await self.predict_async(f"s3://{bucket}/" + file_location, model_prefix, session=session)
return model_prefix, response
results = []
for coro in asyncio.as_completed([run_model(mp) for mp in model_prefixes]):
result = await coro
results.append(result)
# Run all model calls concurrently
results = await asyncio.gather(
*(run_model(mp) for mp in model_prefixes),
return_exceptions=True
)
for model_prefix, response in results:
if response:
if response and not isinstance(response, Exception):
predictions_bucket = self.prediction_buckets[model_prefix]
predictions_df = pd.DataFrame(
read_dataframe_from_s3_parquet(
@ -257,8 +259,10 @@ class ModelApi:
model_prefixes = self.MODEL_PREFIXES if model_prefies is None else model_prefies
session = self.get_aiohttp_session()
tasks = [
self._send_warm_up_request(session, f"{self.base_url}/{self.MODEL_URLS[model_prefix]}/predict",
model_prefix)
self._send_warm_up_request(
session, f"{self.base_url}/{self.MODEL_URLS[model_prefix]}/predict",
model_prefix
)
for model_prefix in model_prefixes
]
await asyncio.gather(*tasks, return_exceptions=True)
@ -271,7 +275,10 @@ class ModelApi:
"file_location": "s3://warm-up-placeholder",
"portfolio_id": 0,
"property_id": "",
"created_at": "2020-01-01T00:00:00"
"created_at": "2020-01-01T00:00:00",
"warm": True
# The presence of this key will send the api down a specific warm up route, to call
# prediction and load the font manager, because that is a key bottleneck for cold starts
}
async with session.post(url, json=json_payload, timeout=10) as response:
text = await response.text()

View file

@ -0,0 +1,14 @@
party_map = {
"Before 1900": 'England and Wales: before 1900',
"1900-1929": 'England and Wales: 1900-1929',
"1930-1949": 'England and Wales: 1930-1949',
"1950-1966": 'England and Wales: 1950-1966',
"1967-1975": 'England and Wales: 1967-1975',
"1976-1982": 'England and Wales: 1976-1982',
"1983-1990": 'England and Wales: 1983-1990',
"1991-1995": 'England and Wales: 1991-1995',
"1996-2002": 'England and Wales: 1996-2002',
"2003-2006": 'England and Wales: 2003-2006',
"2007-2011": 'England and Wales: 2007-2011',
"2012 onwards": 'England and Wales: 2012-2021',
}

View file

@ -0,0 +1,15 @@
parity_map = {
"MidTerrace": "Mid-Terrace",
"EndTerrace": "End-Terrace",
"Detached": "Detached",
"SemiDetached": "Semi-Detached",
"EnclosedMidTerrace": "Enclosed Mid-Terrace",
"EnclosedEndTerrace": "Enclosed End-Terrace",
}
# MidTerrace 41462
# EndTerrace 20910
# Detached 16875
# SemiDetached 14725
# EnclosedMidTerrace 3176
# EnclosedEndTerrace 2393

View file

@ -0,0 +1,6 @@
parity_map = {
"Flat": "Flat",
"Maisonette": "Maisonette",
"Bungalow": "Bungalow",
"House": "House",
}

View file

@ -0,0 +1,3 @@
parity_map = {
}

View file

@ -0,0 +1,95 @@
import pandas as pd
from etl.epc.DataProcessor import construction_age_bounds_map
from backend.onboarders.mappings.property_type import parity_map as property_map
from backend.onboarders.mappings.age_band import party_map as age_band_map
from backend.onboarders.mappings.built_form import parity_map as built_form_map
def check_nulls(data, original_column, mapped_column):
# We only allow nulls if the oroginal value was null
null_vals = data[pd.isnull(data[mapped_column])]
if null_vals.empty:
return True
# We make sure all original values were null
assert pd.isnull(null_vals[original_column]).all(), (
f"Some values in {mapped_column} were not mapped, but original values were not null"
)
# Sample input data
data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Sustainability"
)
# We want to map the parity fields to standard EPC references. This will allow us to
# 1) Estimate EPCs, more accurately
# 2) Patch incorrect EPCs with ease
# 3) Indicate already installed measures
# ------------ construction_age_band ------------
# Map to EPC age bands
# def construction_date_to_band(year):
# if pd.isnull(year):
# return None
# # Get the year from the date which is numpy datetime format
# for label, ranges in construction_age_bounds_map.items():
# if ranges["l"] <= year <= ranges["u"]:
# return label
# raise NotImplementedError("year out of bounds")
#
#
# data["construction_age_band"] = pd.to_datetime(data["Construction Date"]).dt.year.apply(construction_date_to_band)
data["construction_age_band"] = data["Construction Years"].map(age_band_map)
check_nulls(data, "Construction Years", "construction_age_band")
# ------------ property_type ------------
data["property_type"] = data["Type"].map(property_map)
assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped"
# ------------ built_form ------------
data["built_form"] = data["Attachment"].map(built_form_map)
assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped"
# ------------ Wall Construction ------------
data["walls_combined"] = data["Wall Construction"] + "+" + data["Wall Insulation"].fillna("Unknown Insulation")
data["Wall Insulation"].value_counts()
data["Wall Construction"].value_counts()
as_built_map = {
"Cavity": {"insulated_age_bands":[], "partial_insulated_age_bands": []},
"Solid Brick": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"System": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Timber Frame": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Sandstone": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Granite": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Cob": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
}
def map_wall_construction(wall_constuction, wall_insulation, construction_age_band):
if wall_insulation == "AsBuilt":
# Deduce based on wall construction and age band
bands = as_built_map.get(wall_constuction, None)
if bands is None:
raise NotImplementedError(f"Wall construction {wall_constuction} not in as built map")
# We check if the age band is in insulated or partial insulated, and if neither, we assume uninsulated
# Variables we want to map
'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type',
'Attachment', 'Construction Years', 'Wall Construction',
'Wall Insulation', 'Roof Construction', 'Roof Insulation',
'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating',
'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN',
'Total Floor Area (m2)'

11
backend/run_curl.sh Normal file
View file

@ -0,0 +1,11 @@
curl -X POST "http://localhost:8000/v1/whlg/eligible" \
-H "Content-Type: application/json" \
-d '{"postcode": "B93 8SY"}'
curl -X POST "http://localhost:8000/v1/whlg/eligible" \
-H "Content-Type: application/json" \
-d '{"postcode": "BN15 0FD"}'
curl -X POST "http://localhost:8000/v1/whlg/eligible" \
-H "Content-Type: application/json" \
-d '{"postcode": "DY6 0LB"}'

6
backend/run_local.sh Normal file
View file

@ -0,0 +1,6 @@
set -a
source ./.env
set +a
uvicorn app.main:app --reload

View file

@ -4,7 +4,7 @@ innovation_scenarios = [
# 1) Innovation PV, non-eligible heating system in place, EPC D - not eligible
{
"description": "Innovation PV, non-eligible heating system in place, EPC D",
"measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}],
"measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}],
"starting_sap": 60,
"mainheat_description": "Electric storage heaters",
"heating_control_description": "Manual charge control",
@ -16,7 +16,7 @@ innovation_scenarios = [
# 2) Innovation PV, eligible heating system in place, EPC D - eligible
{
"description": "Innovation PV, eligible heating system in place, EPC D",
"measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}],
"measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}],
"starting_sap": 60,
"mainheat_description": "Air source heat pump, radiators",
"heating_control_description": "Programmer, room thermostat and TRVs",
@ -29,8 +29,8 @@ innovation_scenarios = [
{
"description": "Innovation PV + HHRSH upgrade, EPC E",
"measures": [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "high_heat_retention_storage_heater", "is_innovation": True, "uplift": 0.1}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "high_heat_retention_storage_heaters", "is_innovation": False, "innovation_uplift": 0}
],
"starting_sap": 50,
"mainheat_description": "Electric storage heaters",
@ -44,8 +44,8 @@ innovation_scenarios = [
{
"description": "Innovation PV + HHRSH upgrade, EPC E",
"measures": [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "high_heat_retention_storage_heater", "is_innovation": True, "uplift": 0.1}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "high_heat_retention_storage_heaters", "is_innovation": False, "innovation_uplift": 0}
],
"starting_sap": 50,
"mainheat_description": "Electric storage heaters",
@ -58,7 +58,7 @@ innovation_scenarios = [
# 5) Innovation PV, needs wall insulation, no wall insulation measure - not eligible
{
"description": "Innovation PV, wall insulation recommended, but not installed",
"measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}],
"measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}],
"starting_sap": 60,
"mainheat_description": "Air source heat pump, radiators",
"heating_control_description": "Programmer, room thermostat and TRVs",
@ -71,8 +71,8 @@ innovation_scenarios = [
{
"description": "Innovation PV, wall insulation recommended and installed",
"measures": [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0.25}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25}
],
"starting_sap": 60,
"mainheat_description": "Air source heat pump, radiators",
@ -85,7 +85,7 @@ innovation_scenarios = [
# 7) Innovation PV, needs roof insulation, no roof insulation measure - not eligible
{
"description": "Innovation PV, roof insulation recommended, not installed",
"measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}],
"measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}],
"starting_sap": 60,
"mainheat_description": "Air source heat pump, radiators",
"heating_control_description": "Programmer, room thermostat and TRVs",
@ -98,8 +98,8 @@ innovation_scenarios = [
{
"description": "Innovation PV, roof insulation recommended and installed",
"measures": [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "loft_insulation", "is_innovation": False, "uplift": 0}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}
],
"starting_sap": 60,
"mainheat_description": "Air source heat pump, radiators",
@ -112,7 +112,7 @@ innovation_scenarios = [
# 9) Innovation PV, needs both roof + wall insulation, no insulation - not eligible
{
"description": "Innovation PV, both insulations recommended, none installed",
"measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}],
"measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}],
"starting_sap": 60,
"mainheat_description": "Air source heat pump, radiators",
"heating_control_description": "Programmer, room thermostat and TRVs",
@ -125,8 +125,8 @@ innovation_scenarios = [
{
"description": "Innovation PV, both insulations recommended, only wall done",
"measures": [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0.25}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25}
],
"starting_sap": 60,
"mainheat_description": "Air source heat pump, radiators",
@ -140,8 +140,8 @@ innovation_scenarios = [
{
"description": "Innovation PV, both insulations recommended, only roof done",
"measures": [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "loft_insulation", "is_innovation": False, "uplift": 0}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}
],
"starting_sap": 60,
"mainheat_description": "Air source heat pump, radiators",
@ -155,9 +155,9 @@ innovation_scenarios = [
{
"description": "Innovation PV, both insulations recommended and installed",
"measures": [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0.25},
{"type": "loft_insulation", "is_innovation": False, "uplift": 0}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25},
{"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}
],
"starting_sap": 60,
"mainheat_description": "Air source heat pump, radiators",

View file

@ -120,7 +120,7 @@ def test_eco4_prs_eligible_with_swi(
# 3) is getting a solid was measure
# so it's eligible for ECO4
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}]
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}]
funding.check_funding(
measures=measures,
starting_sap=50, # EPC E
@ -162,7 +162,7 @@ def test_eco4_prs_not_eligible_high_epc(
tenure="Private",
)
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}]
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}]
funding.check_funding(
measures=measures,
starting_sap=72, # EPC C (too high)
@ -203,7 +203,7 @@ def test_gbis_prs_general_eligibility(
tenure="Private",
)
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}]
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}]
funding.check_funding(
measures=measures,
starting_sap=65, # EPC D
@ -244,7 +244,7 @@ def test_gbis_prs_low_income_caveat(
tenure="Private",
)
measures = [{"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0}]
measures = [{"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0}]
funding.check_funding(
measures=measures,
starting_sap=60, # EPC D
@ -290,7 +290,7 @@ def test_eco4_sh_epc_e_eligible(
tenure="Social",
)
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}]
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}]
funding.check_funding(
measures=measures,
starting_sap=50, # EPC E
@ -330,7 +330,7 @@ def test_eco4_sh_epc_d_requires_innovation(
tenure="Social",
)
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}]
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}]
funding.check_funding(
measures=measures,
starting_sap=60, # EPC D
@ -365,7 +365,7 @@ def test_eco4_sh_epc_d_requires_innovation(
gbis_private_solid_abs_rate=28,
tenure="Social",
)
measures2 = [{"type": "internal_wall_insulation", "is_innovation": True, "uplift": 0.25}]
measures2 = [{"type": "internal_wall_insulation", "is_innovation": True, "innovation_uplift": 0.25}]
funding2.check_funding(
measures=measures2,
starting_sap=60, # EPC D
@ -403,7 +403,7 @@ def test_eco4_sh_epc_d_requires_innovation(
gbis_private_solid_abs_rate=28,
tenure="Social",
)
measures3 = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}]
measures3 = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}]
funding3.check_funding(
measures=measures3,
starting_sap=60, # EPC D
@ -439,7 +439,7 @@ def test_eco4_sh_epc_d_requires_innovation(
tenure="Social",
)
measures4 = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, ]
measures4 = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, ]
funding4.check_funding(
measures=measures4,
starting_sap=60, # EPC D
@ -476,8 +476,8 @@ def test_eco4_sh_epc_d_requires_innovation(
)
measures5 = [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "high_heat_retention_storage_heater", "is_innovation": False, "uplift": 0}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "high_heat_retention_storage_heaters", "is_innovation": False, "innovation_uplift": 0}
]
funding5.check_funding(
measures=measures5,
@ -516,7 +516,7 @@ def test_eco4_sh_epc_d_requires_innovation(
)
measures6 = [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
]
funding6.check_funding(
measures=measures6,
@ -556,9 +556,9 @@ def test_eco4_sh_epc_d_requires_innovation(
tenure="Social",
)
measures7 = [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0.25},
{"type": "loft_insulation", "is_innovation": False, "uplift": 0}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25},
{"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}
]
funding7.check_funding(
measures=measures7,
@ -599,7 +599,7 @@ def test_eco4_sh_solar_pv_requires_heating(
tenure="Social",
)
measures = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}]
measures = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}]
funding.check_funding(
measures=measures,
starting_sap=60, # EPC D
@ -641,8 +641,8 @@ def test_eco4_sh_solar_pv_with_heating_is_ok(
)
measures = [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0}
]
funding.check_funding(
measures=measures,
@ -684,7 +684,7 @@ def test_eco4_upgrade_requirement_e_to_c_pass(
tenure="Private",
)
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}]
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}]
# E (SAP 50) → C (SAP 70) meets upgrade rule
funding.check_funding(
@ -727,7 +727,7 @@ def test_eco4_upgrade_requirement_e_to_d_fail(
tenure="Private",
)
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}]
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}]
# E (SAP 50) → D (SAP 65) does NOT meet ECO4 upgrade rule
funding.check_funding(
@ -770,7 +770,7 @@ def test_eco4_upgrade_requirement_f_to_d_pass(
tenure="Private",
)
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}]
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}]
# F (SAP 35) → D (SAP 60) is OK for ECO4
funding.check_funding(
@ -813,7 +813,7 @@ def test_eco4_upgrade_requirement_f_to_e_fail(
tenure="Private",
)
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}]
measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}]
# F (SAP 35) → E (SAP 50) does NOT meet ECO4 rule
funding.check_funding(
@ -859,7 +859,7 @@ def test_epc_d_social_no_innovation_no_heating(
)
measures = [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}
]
funding.check_funding(
@ -905,10 +905,10 @@ def test_epc_d_social_with_heating_and_insulation(
# Should NOT be eligible as the ASHP is not an innovation measure
measures = [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0},
{"type": "loft_insulation", "is_innovation": False, "uplift": 0},
{"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0},
{"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0},
{"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0}
]
funding.check_funding(
@ -954,9 +954,9 @@ def test_epc_d_social_solar_with_only_minimum_insulation_should_fail(
# Solar PV innovation with insulation, but no heating system upgrade => not eligible
measures = [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0},
{"type": "loft_insulation", "is_innovation": False, "uplift": 0}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0},
{"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}
]
funding.check_funding(
@ -1002,8 +1002,8 @@ def test_epc_d_social_solar_with_ashp_and_no_insulation_should_fail(
# Solar PV innovation with heating, but no insulation when insulation is recommended => not eligible
measures = [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0}
]
funding.check_funding(
@ -1050,10 +1050,10 @@ def test_epc_d_social_solar_with_heating_and_minimum_insulation_should_pass(
# Innovation solar + insulation measures + eligible heating upgrade = not valid because the heat pump isn;t
# an innovation measure
measures = [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0},
{"type": "loft_insulation", "is_innovation": False, "uplift": 0},
{"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0},
{"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0},
{"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0}
]
funding.check_funding(
@ -1095,10 +1095,10 @@ def test_epc_d_social_solar_with_heating_and_minimum_insulation_should_pass(
# Innovation solar + insulation measures + eligible heating upgrade = should be valid because the
# heat pump is an innovation measure
measures2 = [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0},
{"type": "loft_insulation", "is_innovation": False, "uplift": 0},
{"type": "air_source_heat_pump", "is_innovation": True, "uplift": 0.25}
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0},
{"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0},
{"type": "air_source_heat_pump", "is_innovation": True, "innovation_uplift": 0.25}
]
funding2.check_funding(
@ -1203,11 +1203,11 @@ def test_uplift(
# # TODO: Add a scenario with multiple measures, where some are innovation, some are not and we have
# TODO: Make sure private works too
measures = [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0},
{"type": "loft_insulation", "is_innovation": False, "uplift": 0},
{"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0},
{"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0.25},
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0},
{"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0},
{"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0},
{"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25},
]
funding.check_funding(
@ -1229,7 +1229,7 @@ def test_uplift(
)
assert funding.eco4_funding == 5302.3949999999995
assert funding.full_project_abs == 392.77 # is 280 + the 112.77 innovation uplift
assert funding.full_project_abs == 280 # Doesn't include the eco4 uplift
assert funding.eco4_uplift == 112.77
@ -1311,7 +1311,7 @@ def test_private_epc_e_solar_needs_heating(
tenure="Private",
)
measures = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}]
measures = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}]
funding.check_funding(
measures=measures,
starting_sap=54, # EPC E - eligible for private on EPC
@ -1360,10 +1360,10 @@ def test_private_epc_e_solar_with_heating_and_minimum_insulation_produces_uplift
)
measures = [
{"type": "solar_pv", "is_innovation": True, "uplift": 0.45},
{"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0},
{"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0},
{"type": "loft_insulation", "is_innovation": False, "uplift": 0},
{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45},
{"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0},
{"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0},
{"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0},
]
funding.check_funding(
@ -1393,3 +1393,85 @@ def test_private_epc_e_solar_with_heating_and_minimum_insulation_produces_uplift
assert funding.eco4_uplift and funding.eco4_uplift > 0
# And total funding should include that uplift
assert funding.eco4_funding and funding.eco4_funding > 0
def test_existing_gshp_to_ashp():
r = {'phase': 3, 'parts': [], 'type': 'heating', 'measure_type': 'air_source_heat_pump',
'description': 'Install a 5KW air source heat pump, and upgrade heating controls to Smart Thermostats, '
'room sensors and smart radiator valves (time & temperature zone control). Ensure you have a '
'single tariff',
'starting_u_value': None, 'new_u_value': None, 'sap_points': 7.7, 'already_installed': False,
'simulation_config': {'mainheat_energy_eff_ending': 'Good', 'hot_water_energy_eff_ending': 'Average',
'has_air_source_heat_pump_ending': True, 'has_ground_source_heat_pump_ending': False,
'extra_features_ending': None,
'thermostatic_control_ending': 'time and temperature zone control',
'switch_system_ending': None, 'multiple_room_thermostats_ending': False,
'mainheatc_energy_eff_ending': 'Very Good'},
'description_simulation': {'mainheat-description': 'Air source heat pump, radiators, electric',
'mainheat-energy-eff': 'Good', 'hot-water-energy-eff': 'Average',
'hotwater-description': 'From main system',
'mainheatcont-description': 'Time and temperature zone control',
'mainheatc-energy-eff': 'Very Good'}, 'total': 13188.996000000001,
'contingency': 3145.8150000000005, 'contingency_rate': 0.35, 'vat': 2080.666, 'labour_hours': 44.7,
'labour_days': 6.0, 'innovation_rate': 0, 'recommendation_id': '6_phase=3',
'efficiency': 13188.996000000001, 'co2_equivalent_savings': 0.4999999999999998,
'heat_demand': 53.20000000000002, 'kwh_savings': 801.5000000000005,
'energy_cost_savings': 327.31316785714296
}
funding = Funding(
project_scores_matrix=mock_project_scores_matrix,
partial_project_scores_matrix=mock_partial_scores_matrix,
whlg_eligible_postcodes=mock_whlg_postcodes,
eco4_social_cavity_abs_rate=13.5,
eco4_social_solid_abs_rate=17,
eco4_private_cavity_abs_rate=13.5,
eco4_private_solid_abs_rate=17,
gbis_social_cavity_abs_rate=21,
gbis_social_solid_abs_rate=25,
gbis_private_cavity_abs_rate=22,
gbis_private_solid_abs_rate=28,
tenure="Private",
)
(
pps, ppf, iu, ups
) = funding.get_innovation_uplift(
measure=r,
starting_sap=62,
floor_area=69,
is_cavity=True,
current_wall_uvalue=0.7,
is_partial=False,
existing_li_thickness=200,
mainheating={
'original_description': 'Ground source heat pump, radiators, electric',
'clean_description': 'Ground source heat pump, radiators, electric', 'has_radiators': True,
'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False,
'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False,
'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False,
'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False,
'has_community_scheme': False, 'has_ground_source_heat_pump': True, 'has_no_system_present': False,
'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False,
'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False,
'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_hot-water-only': False,
'has_electric': True, 'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False,
'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False,
'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_mineral_and_wood': False,
'has_dual_fuel_appliance': False, 'has_assumed': False, 'has_electricaire': False,
'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False
},
main_fuel={
'original_description': 'electricity (not community)',
'clean_description': 'Electricity not community', 'fuel_type': 'electricity', 'tariff_type': None,
'is_community': False, 'no_individual_heating_or_community_network': False,
'complex_fuel_type': None
},
mainheat_energy_eff="Poor",
)
# All should be zero
assert pps == 0
assert ppf == 0
assert iu == 0
assert ups == 0

View file

@ -1,36 +1,36 @@
import ast
import json
# import ast
# import json
from copy import deepcopy
from dataclasses import replace
from datetime import datetime
# from dataclasses import replace
# from datetime import datetime
import random
from tqdm import tqdm
import pandas as pd
# import pandas as pd
import numpy as np
from etl.epc.Record import EPCRecord
from backend.SearchEpc import SearchEpc
from sqlalchemy.exc import IntegrityError, OperationalError
from sqlalchemy.orm import sessionmaker
from starlette.responses import Response
# from backend.SearchEpc import SearchEpc
# from sqlalchemy.exc import IntegrityError, OperationalError
# from sqlalchemy.orm import sessionmaker
# from starlette.responses import Response
from backend.app.config import get_settings, get_prediction_buckets
from backend.app.db.connection import db_engine
from backend.app.db.functions.materials_functions import get_materials
from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations
from backend.app.db.functions.property_functions import (
create_property, create_property_details_epc, create_property_targets, update_property_data,
update_or_create_property_spatial_details
)
from backend.app.db.functions.recommendations_functions import (
create_plan, upload_recommendations, create_scenario
)
from backend.app.db.functions.funding_functions import upload_funding
from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
from backend.app.db.models.portfolio import rating_lookup
# from backend.app.config import get_settings, get_prediction_buckets
# from backend.app.db.connection import db_engine
# from backend.app.db.functions.materials_functions import get_materials
# from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations
# from backend.app.db.functions.property_functions import (
# create_property, create_property_details_epc, create_property_targets, update_property_data,
# update_or_create_property_spatial_details
# )
# from backend.app.db.functions.recommendations_functions import (
# create_plan, upload_recommendations, create_scenario
# )
# from backend.app.db.functions.funding_functions import upload_funding
# from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
# from backend.app.db.models.portfolio import rating_lookup
from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES
from backend.app.plan.utils import get_cleaned
from backend.app.utils import sap_to_epc
# from backend.app.plan.utils import get_cleaned
# from backend.app.utils import sap_to_epc
import backend.app.assumptions as assumptions
from backend.ml_models.api import ModelApi
@ -41,13 +41,13 @@ from recommendations.optimiser.CostOptimiser import CostOptimiser
from recommendations.optimiser.GainOptimiser import GainOptimiser
import recommendations.optimiser.optimiser_functions as optimiser_functions
from recommendations.Recommendations import Recommendations
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3
from backend.ml_models.Valuation import PropertyValuation
from etl.bill_savings.KwhData import KwhData
from etl.spatial.OpenUprnClient import OpenUprnClient
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
# from utils.logger import setup_logger
# from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3
# from backend.ml_models.Valuation import PropertyValuation
#
# from etl.bill_savings.KwhData import KwhData
# from etl.spatial.OpenUprnClient import OpenUprnClient
# from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
from backend.Funding import Funding
from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths
@ -72,7 +72,7 @@ with open("kwh_client_for_deletion.pkl", "rb") as f:
kwh_client = pickle.load(f)
epc_data = pd.read_csv(
"/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates/domestic-E06000002-Middlesbrough/certificates.csv",
"/Users/khalimconn-kowlessar/Downloads/domestic-E06000002-Middlesbrough/certificates.csv",
low_memory=False
)
@ -82,6 +82,12 @@ costs_by_floor_area = epc_data[
][["TOTAL_FLOOR_AREA", "CURRENT_ENERGY_EFFICIENCY", "LIGHTING_COST_CURRENT", "HEATING_COST_CURRENT",
"HOT_WATER_COST_CURRENT"]].copy()
epc_data = epc_data[
(epc_data["MAINHEAT_DESCRIPTION"].str.contains("SAP05:") == False) &
(~epc_data["LIGHTING_COST_CURRENT"].isin([None, ""])) &
(~pd.isnull(epc_data["LIGHTING_COST_CURRENT"]))
]
costs_by_floor_area.columns = [c.lower().replace("_", "-") for c in costs_by_floor_area.columns]
for c in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]:
costs_by_floor_area[c + "_scaled"] = costs_by_floor_area[c] / costs_by_floor_area["total-floor-area"]
@ -90,8 +96,10 @@ costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[
["lighting-cost-current_scaled", "heating-cost-current_scaled", "hot-water-cost-current_scaled"]
].mean().reset_index()
sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample(
1000).reset_index(drop=True)
epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2008-01-01"].drop_duplicates("UPRN").sample(
50000).reset_index(drop=True)
# TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type
# TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used
@ -161,6 +169,8 @@ mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_pred
mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"])
# TODO: We might want to implement this generally, via an ETL process
for x in cleaned["mainheat-description"]:
x["has_wood_chips"] = False
for p in input_properties:
for col in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]:
if pd.isnull(p.data[col]):
@ -302,10 +312,19 @@ body = PlanTriggerRequest(
'sheet_name': None, 'sheet_count': None, 'index_start': None, 'index_end': None}
)
eco_packages = {}
# For testing
for p in input_properties:
eco_packages[p.id] = (None, None, None)
for p in tqdm(input_properties):
if not recommendations.get(p.id):
continue
# Temp allow to skip
if not isinstance(recommendations.get(p.id)[0], list):
continue
# we need to double unlist because we have a list of lists
property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs}
property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures]
@ -327,34 +346,34 @@ for p in tqdm(input_properties):
fixed_gain = optimiser_functions.calculate_fixed_gain(
property_required_measures, recommendations, p, needs_ventilation
)
gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain)
gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages)
funding = Funding(
tenure="Social",
project_scores_matrix=project_scores_matrix,
partial_project_scores_matrix=partial_project_scores_matrix,
whlg_eligible_postcodes=whlg_eligible_postcodes,
eco4_social_cavity_abs_rate=12.5,
eco4_social_solid_abs_rate=17,
eco4_private_cavity_abs_rate=12.5,
eco4_private_solid_abs_rate=17,
gbis_social_cavity_abs_rate=21,
gbis_social_solid_abs_rate=25,
gbis_private_cavity_abs_rate=21,
gbis_private_solid_abs_rate=28,
)
li_thickness = convert_thickness_to_numeric(
p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"]
)
current_wall_u_value = p.walls["thermal_transmittance"]
if current_wall_u_value is None:
current_wall_u_value = get_wall_u_value(
clean_description=p.walls["clean_description"],
age_band=p.age_band,
is_granite_or_whinstone=p.walls["is_granite_or_whinstone"],
is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"],
)
# funding = Funding(
# tenure=body.housing_type,
# project_scores_matrix=project_scores_matrix,
# partial_project_scores_matrix=partial_project_scores_matrix,
# whlg_eligible_postcodes=whlg_eligible_postcodes,
# eco4_social_cavity_abs_rate=13,
# eco4_social_solid_abs_rate=17,
# eco4_private_cavity_abs_rate=13,
# eco4_private_solid_abs_rate=17,
# gbis_social_cavity_abs_rate=21,
# gbis_social_solid_abs_rate=25,
# gbis_private_cavity_abs_rate=21,
# gbis_private_solid_abs_rate=28,
# )
#
# li_thickness = convert_thickness_to_numeric(
# p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"]
# )
# current_wall_u_value = p.walls["thermal_transmittance"]
# if current_wall_u_value is None:
# current_wall_u_value = get_wall_u_value(
# clean_description=p.walls["clean_description"],
# age_band=p.age_band,
# is_granite_or_whinstone=p.walls["is_granite_or_whinstone"],
# is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"],
# )
# We insert the innovation uplift
measures_to_optimise_with_uplift = deepcopy(measures_to_optimise)
@ -362,41 +381,53 @@ for p in tqdm(input_properties):
# TODO: Turn this into a function and store the innovaiton uplift
for group in measures_to_optimise_with_uplift:
for r in group:
if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating",
"extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]:
(
r["partial_project_score"],
r["partial_project_funding"],
r["innovation_uplift"],
r["uplift_project_score"],
) = (
0, 0, 0, 0
)
continue
(
r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"],
r["uplift_project_score"]
) = funding.get_innovation_uplift(
measure=r,
starting_sap=p.data["current-energy-efficiency"],
floor_area=p.floor_area,
is_cavity=p.walls["is_cavity_wall"],
current_wall_uvalue=current_wall_u_value,
is_partial="partial" in p.walls["clean_description"].lower(),
existing_li_thickness=li_thickness,
mainheating=p.main_heating,
main_fuel=p.main_fuel,
mainheat_energy_eff=p.data["mainheat-energy-eff"],
(r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"],
r["uplift_project_score"]) = (
0, 0, 0, 0
)
# if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating",
# "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]:
# (
# r["partial_project_score"],
# r["partial_project_funding"],
# r["innovation_uplift"],
# r["uplift_project_score"],
# ) = (
# 0, 0, 0, 0
# )
# continue
#
# (
# r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"],
# r["uplift_project_score"]
# ) = funding.get_innovation_uplift(
# measure=r,
# starting_sap=int(p.data["current-energy-efficiency"]),
# floor_area=p.floor_area,
# is_cavity=p.walls["is_cavity_wall"],
# current_wall_uvalue=current_wall_u_value,
# is_partial="partial" in p.walls["clean_description"].lower(),
# existing_li_thickness=li_thickness,
# mainheating=p.main_heating,
# main_fuel=p.main_fuel,
# mainheat_energy_eff=p.data["mainheat-energy-eff"],
# )
if r["already_installed"]:
# if already installed, we zero out the uplift and funding
(r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"],
r["uplift_project_score"]) = (
0, 0, 0, 0
)
input_measures = optimiser_functions.prepare_input_measures(
measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True
measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True,
property_eco_packages=eco_packages.get(p.id)
)
# When the goal is Increasing EPC, we can run the funding optimiser
if body.goal == "Increasing EPC":
if body.goal == "Switch off":
solutions = optimise_with_funding_paths(
p=p,
@ -404,20 +435,14 @@ for p in tqdm(input_properties):
housing_type=body.housing_type,
budget=body.budget,
target_gain=gain,
funding=funding
funding=funding,
work_package=eco_packages[p.id][2]
)
# Given the solutions we select the optimal one
solutions["cost_less_full_project_funding"] = np.where(
solutions["scheme"] == "eco4",
solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"],
solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"]
)
solutions["cost_less_full_project_funding"] = (
solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"]
)
solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True)
# If the solution isn't eligible, we can't really consider it
solutions = solutions[
(solutions["is_eligible"] & (solutions["scheme"] != "none")) | (solutions["scheme"] == "none")
]
if solutions["meets_upgrade_target"].any():
# If we have a solution that meets the upgrade target, we select that one
@ -428,9 +453,13 @@ for p in tqdm(input_properties):
# This is the list of measures that we will recommend
scheme = optimal_solution["scheme"]
funded_measures = optimal_solution["items"] if scheme != "none" else []
solution = optimal_solution["items"] + optimal_solution["unfunded_items"]
# This is the total amount of funding that the project will produce (including uplifts) (£)
# We create this full list of selected measures, which is used in the next section for setting
# default measures
solution = deepcopy(optimal_solution["items"]) + deepcopy(optimal_solution["unfunded_items"])
funded_measures = deepcopy(optimal_solution["items"]) if scheme != "none" else []
# This is the total amount of funding that the project will produce (EXCLUDING uplifts) (£)
project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \
optimal_solution["partial_project_funding"]
# This is the total amount of funding associated to the uplift (£)
@ -468,37 +497,43 @@ for p in tqdm(input_properties):
ROOF_INSULATION_MEASURES
)
funding.check_funding(
measures=solution,
starting_sap=p.data["current-energy-efficiency"],
ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]),
floor_area=p.floor_area,
mainheat_description=p.main_heating["clean_description"],
heating_control_description=p.main_heating_controls["clean_description"],
is_cavity=p.walls["is_cavity_wall"],
current_wall_uvalue=current_wall_u_value,
is_partial="partial" in p.walls["clean_description"].lower(),
existing_li_thickness=li_thickness,
mainheating=p.main_heating,
main_fuel=p.main_fuel,
mainheat_energy_eff=p.data["mainheat-energy-eff"],
has_wall_insulation_recommendation=has_wall_insulation_recommendation,
has_roof_insulation_recommendation=has_roof_insulation_recommendation,
)
# funding.check_funding(
# measures=solution,
# starting_sap=int(p.data["current-energy-efficiency"]),
# ending_sap=int(p.data["current-energy-efficiency"]) + sum([x["gain"] for x in solution]),
# floor_area=p.floor_area,
# mainheat_description=p.main_heating["clean_description"],
# heating_control_description=p.main_heating_controls["clean_description"],
# is_cavity=p.walls["is_cavity_wall"],
# current_wall_uvalue=current_wall_u_value,
# is_partial="partial" in p.walls["clean_description"].lower(),
# existing_li_thickness=li_thickness,
# mainheating=p.main_heating,
# main_fuel=p.main_fuel,
# mainheat_energy_eff=p.data["mainheat-energy-eff"],
# has_wall_insulation_recommendation=has_wall_insulation_recommendation,
# has_roof_insulation_recommendation=has_roof_insulation_recommendation,
# )
# Determine the scheme
scheme = "none"
if funding.eco4_eligible:
scheme = "eco4"
if scheme == "none" and funding.gbis_eligible:
scheme = "gbis"
# if funding.eco4_eligible:
# scheme = "eco4"
# if scheme == "none" and funding.gbis_eligible:
# scheme = "gbis"
funded_measures = solution if scheme in ["gbis", "eco4"] else []
project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs
total_uplift = funding.eco4_uplift
full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs
partial_project_score = funding.partial_project_abs
uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift
funded_measures = []
# funded_measures = solution if scheme in ["gbis", "eco4"] else []
# project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs
project_funding = 0
# total_uplift = funding.eco4_uplift
total_uplift = 0
# full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs
full_project_score = 0
# partial_project_score = funding.partial_project_abs
partial_project_score = 0
# uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift
uplift_project_score = 0
selected = {r["id"] for r in solution}
@ -510,10 +545,10 @@ for p in tqdm(input_properties):
# Add best practice measures (ventilation/trickle vents)
selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected)
# Final flattening - Don't do this!
# recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults(
# p.id, recommendations, selected
# )
# Final flattening
recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults(
p.id, recommendations, selected
)
# TODO: functionise
for measure in funded_measures:
@ -529,3 +564,231 @@ for p in tqdm(input_properties):
partial_project_score=partial_project_score,
uplift_project_score=uplift_project_score
)
# for p in tqdm(input_properties):
# if not recommendations.get(p.id):
# continue
#
# # we need to double unlist because we have a list of lists
# property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs}
# property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures]
# measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures]
#
# # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore
# # its inclusion
# needs_ventilation = any(
# x in property_measure_types for x in assumptions.measures_needing_ventilation
# ) and not p.has_ventilation
#
# if not measures_to_optimise:
# # Nothing to do, we just reshape the recommendations
# recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults(
# p.id, recommendations, set()
# )
# continue
#
# fixed_gain = optimiser_functions.calculate_fixed_gain(
# property_required_measures, recommendations, p, needs_ventilation
# )
# gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain)
#
# funding = Funding(
# tenure="Social",
# project_scores_matrix=project_scores_matrix,
# partial_project_scores_matrix=partial_project_scores_matrix,
# whlg_eligible_postcodes=whlg_eligible_postcodes,
# eco4_social_cavity_abs_rate=12.5,
# eco4_social_solid_abs_rate=17,
# eco4_private_cavity_abs_rate=12.5,
# eco4_private_solid_abs_rate=17,
# gbis_social_cavity_abs_rate=21,
# gbis_social_solid_abs_rate=25,
# gbis_private_cavity_abs_rate=21,
# gbis_private_solid_abs_rate=28,
# )
#
# li_thickness = convert_thickness_to_numeric(
# p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"]
# )
# current_wall_u_value = p.walls["thermal_transmittance"]
# if current_wall_u_value is None:
# current_wall_u_value = get_wall_u_value(
# clean_description=p.walls["clean_description"],
# age_band=p.age_band,
# is_granite_or_whinstone=p.walls["is_granite_or_whinstone"],
# is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"],
# )
#
# # We insert the innovation uplift
# measures_to_optimise_with_uplift = deepcopy(measures_to_optimise)
#
# # TODO: Turn this into a function and store the innovaiton uplift
# for group in measures_to_optimise_with_uplift:
# for r in group:
#
# if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating",
# "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]:
# (
# r["partial_project_score"],
# r["partial_project_funding"],
# r["innovation_uplift"],
# r["uplift_project_score"],
# ) = (
# 0, 0, 0, 0
# )
# continue
#
# (
# r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"],
# r["uplift_project_score"]
# ) = funding.get_innovation_uplift(
# measure=r,
# starting_sap=p.data["current-energy-efficiency"],
# floor_area=p.floor_area,
# is_cavity=p.walls["is_cavity_wall"],
# current_wall_uvalue=current_wall_u_value,
# is_partial="partial" in p.walls["clean_description"].lower(),
# existing_li_thickness=li_thickness,
# mainheating=p.main_heating,
# main_fuel=p.main_fuel,
# mainheat_energy_eff=p.data["mainheat-energy-eff"],
# )
#
# input_measures = optimiser_functions.prepare_input_measures(
# measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True
# )
#
# # When the goal is Increasing EPC, we can run the funding optimiser
# if body.goal == "Increasing EPC":
#
# solutions = optimise_with_funding_paths(
# p=p,
# input_measures=input_measures,
# housing_type=body.housing_type,
# budget=body.budget,
# target_gain=gain,
# funding=funding
# )
#
# # Given the solutions we select the optimal one
# solutions["cost_less_full_project_funding"] = np.where(
# solutions["scheme"] == "eco4",
# solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"],
# solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"]
# )
#
# solutions["cost_less_full_project_funding"] = (
# solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"]
# )
# solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True)
#
# if solutions["meets_upgrade_target"].any():
# # If we have a solution that meets the upgrade target, we select that one
# optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0]
# else:
# # Pick the cheapest
# optimal_solution = solutions.iloc[0]
#
# # This is the list of measures that we will recommend
# scheme = optimal_solution["scheme"]
# funded_measures = optimal_solution["items"] if scheme != "none" else []
# solution = optimal_solution["items"] + optimal_solution["unfunded_items"]
# # This is the total amount of funding that the project will produce (including uplifts) (£)
# project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \
# optimal_solution["partial_project_funding"]
# # This is the total amount of funding associated to the uplift (£)
# total_uplift = optimal_solution["total_uplift"]
# # This is the funding scheme selected
# # This is the full project ABS
# full_project_score = optimal_solution["project_score"]
# # This is the partial project ABS
# partial_project_score = optimal_solution["partial_project_score"]
# # This is the uplift score ABS
# uplift_project_score = optimal_solution["total_uplift_score"]
# else:
# # We optimise and then we determine eligibility for funding, based on the measures selected
# optimiser = (
# GainOptimiser(
# input_measures, max_cost=body.budget, max_gain=gain, allow_slack=False
# ) if body.budget else CostOptimiser(input_measures, min_gain=gain)
# )
# optimiser.setup()
# optimiser.solve()
# solution = optimiser.solution
#
# recommendation_types = []
# for measures in input_measures:
# for measure in measures:
# recommendation_types.append(measure["type"])
# recommendation_types = set(recommendation_types)
#
# has_wall_insulation_recommendation = any(
# (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in
# WALL_INSULATION_MEASURES
# )
# has_roof_insulation_recommendation = any(
# (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in
# ROOF_INSULATION_MEASURES
# )
#
# funding.check_funding(
# measures=solution,
# starting_sap=p.data["current-energy-efficiency"],
# ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]),
# floor_area=p.floor_area,
# mainheat_description=p.main_heating["clean_description"],
# heating_control_description=p.main_heating_controls["clean_description"],
# is_cavity=p.walls["is_cavity_wall"],
# current_wall_uvalue=current_wall_u_value,
# is_partial="partial" in p.walls["clean_description"].lower(),
# existing_li_thickness=li_thickness,
# mainheating=p.main_heating,
# main_fuel=p.main_fuel,
# mainheat_energy_eff=p.data["mainheat-energy-eff"],
# has_wall_insulation_recommendation=has_wall_insulation_recommendation,
# has_roof_insulation_recommendation=has_roof_insulation_recommendation,
# )
#
# # Determine the scheme
# scheme = "none"
# if funding.eco4_eligible:
# scheme = "eco4"
# if scheme == "none" and funding.gbis_eligible:
# scheme = "gbis"
#
# funded_measures = solution if scheme in ["gbis", "eco4"] else []
# project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs
# total_uplift = funding.eco4_uplift
# full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs
# partial_project_score = funding.partial_project_abs
# uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift
#
# selected = {r["id"] for r in solution}
#
# if property_required_measures:
# solution = optimiser_functions.add_required_measures(
# property_id=p.id, property_required_measures=property_required_measures,
# recommendations=recommendations, selected=selected,
# )
#
# # Add best practice measures (ventilation/trickle vents)
# selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected)
# # Final flattening - Don't do this!
# # recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults(
# # p.id, recommendations, selected
# # )
#
# # TODO: functionise
# for measure in funded_measures:
# if "+mechanical_ventilation" in measure["type"]:
# measure["type"] = measure["type"].split("+mechanical_ventilation")[0]
#
# p.insert_funding(
# scheme=scheme,
# funded_measures=funded_measures,
# project_funding=project_funding,
# total_uplift=total_uplift,
# full_project_score=full_project_score,
# partial_project_score=partial_project_score,
# uplift_project_score=uplift_project_score
# )

View file

@ -26,7 +26,7 @@ class TestSearchEpcIntegration:
# Test case 2: Another valid address and postcode
# In this case, the newest EPC, does not have a uprn associated to it. If we did a search by
# uprn, we would get an old EPC
("Flat 8, Hainton House", "DN32 9AQ", 10090082018, True,
("Flat 8, Hainton House", "DN32 9AQ", "", True,
"bd1149a20a73397184f07a9955f872424826e70f4870c058d71be887766ee1f8", 2),
# Test case 3: When we make a request to the API for this property, we get back results for
# flats 1, 2 and 3. We have some logic to handle the response so that we get back flat 1
@ -56,7 +56,6 @@ class TestSearchEpcIntegration:
# We check that we have the correct epc
assert epc_searcher.newest_epc["lmk-key"] == lmk_key
assert epc_searcher.newest_epc["uprn"] == uprn
assert len(epc_searcher.older_epcs) == n_old_epcs
def test_search_housenumber(self):

View file

@ -0,0 +1,62 @@
import pandas as pd
from sklearn.linear_model import Ridge
class SAPUpliftTrainer:
"""
Offline training class discovers SAP uplift model coefficients.
"""
def __init__(self, alpha=1.0):
self.alpha = alpha
self.model = Ridge(alpha=self.alpha)
self.feature_names = ["starting SAP", "PV Array size"]
def prepare_data(self, df):
df = df.copy()
# df["is_electric"] = df["heating"].str.contains(
# "Electric", case=False, na=False
# ).astype(int)
X = df[self.feature_names]
y = df["SAP points"]
return X, y
def fit(self, df):
X, y = self.prepare_data(df)
self.model.fit(X, y)
def coefficients(self):
return {
"intercept": float(self.model.intercept_),
**{
name: float(coef)
for name, coef in zip(self.feature_names, self.model.coef_)
}
}
def export_runtime_config(self):
"""
Returns a dict suitable for copy-pasting into the runtime scoring class.
"""
coefs = self.coefficients()
return {
"intercept": coefs["intercept"],
"coef_starting_sap": coefs["starting SAP"],
"coef_pv_size": coefs["PV Array size"],
# "coef_is_electric": coefs["is_electric"],
}
# The training data can be found in the Domna sharepoint in Product Development > Solar Battery Recommendations
df = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/SAP Movement data(Sheet1).csv")
trainer = SAPUpliftTrainer(alpha=1.0)
trainer.fit(df)
print(trainer.coefficients())
print(trainer.export_runtime_config())
# Last updated: 9th December 2025
# Coefficients:
# {'intercept': 10.310168559226678, 'starting SAP': -0.16120648633993315, 'PV Array size': 1.0500492005420736}
# The code for scoring with this model can be found in backend/app/BatterySapScorer.py

View file

@ -310,7 +310,7 @@ class KwhData:
False: "N",
None: "N",
"Y": "Y",
"N": "N"
"N": "N",
}
for v in bools_to_remap:
epc[v] = bool_map[epc[v]]

View file

@ -0,0 +1,47 @@
# After going back to Lincs rural, they gave us some additional data that we can use to try to fetch missed UPRNs again
import pandas as pd
# missed = pd.read_excel(
# "/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_nov_2025.xlsx",
# sheet_name="Missed Properties"
# )
# missed = missed[~pd.isnull(missed["rrn"])]
prepared = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/lincs_rural_standardised_ara_nov_2025.xlsx",
sheet_name="Standardised Asset List"
)
updated_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/MASTER LIST EPCS UPDATED November 2025 Domna Homes - Copy.xlsx",
sheet_name="PROPERTY EPC RATINGS"
)
updated_data = updated_data[~pd.isnull(updated_data["Property Ref."])]
missed = updated_data[~updated_data["Property Ref."].isin(prepared["landlord_property_id"].values.tolist())].copy()
# missed.to_csv("/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_uprn.csv")
# We'll grab the UPRNs manually and then pull them in, and prepare for ARA
missing_uprns = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_uprn.csv")
missing_uprns["landlord_property_id"] = missing_uprns["Property Ref."].copy()
missing_uprns["domna_property_id"] = missing_uprns["Property Ref."].copy()
missing_uprns["domna_address_1"] = missing_uprns['Unnamed: 1'].str.split(",").str[0].str.strip()
missing_uprns["postcode"] = missing_uprns['Unnamed: 1'].str.split(",").str[-1].str.strip()
missing_uprns["landlord_property_type"] = "unknown"
missing_uprns["landlord_built_form"] = "unknown"
missing_uprns["domna_full_address"] = missing_uprns['Unnamed: 1'].copy()
missed_standardised_for_ara = missing_uprns[
['landlord_property_id', 'domna_address_1', 'landlord_property_type', 'landlord_built_form', 'postcode',
'domna_property_id', 'UPRN']
].rename(
columns={"UPRN": "epc_os_uprn"}
)
# Store
missed_standardised_for_ara.to_excel(
"/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_standardised_ara_nov_2025.xlsx",
index=False,
sheet_name="Standardised Asset List"
)

View file

@ -0,0 +1,91 @@
"""
Rough script to prepare the data for Lincs Rural project
"""
from tqdm import tqdm
import pandas as pd
import os
from dotenv import load_dotenv
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
from backend.SearchEpc import SearchEpc
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
data = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/MASTER LIST EPCS UPDATED November 2025 Domna Homes.xlsx",
sheet_name="PROPERTY EPC RATINGS"
)
# We have property RRNs - we need UPRN
standardised_ara_list = []
missed = []
for _, x in tqdm(data.iterrows(), total=len(data)):
try:
rrn = x["EPC Ref."]
# Fetch from find my epc
retriever = RetrieveFindMyEpc(
address="",
postcode="",
rrn=rrn,
address_postal_town="",
)
find_epc_data = retriever.retrieve_newest_find_my_epc_data(rrn=rrn)
# Find the UPRN
epc_searcher = SearchEpc(
address1=str(find_epc_data["address1"]),
postcode=str(find_epc_data["postcode"]),
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=False,
full_address=",".join([find_epc_data["address1"], find_epc_data["address2"]]),
max_retries=5,
)
epc_searcher.find_property(skip_os=True)
# Append in format we need
# Stuff we need:
standardised_ara_list.append(
{
"landlord_property_id": x["Property Ref."],
"domna_address_1": find_epc_data["address1"],
"postcode": find_epc_data["postcode"],
"landlord_property_type": epc_searcher.newest_epc.get("property-type"),
"landlord_built_form": epc_searcher.newest_epc.get("built-form"),
"landlord_heating_system": epc_searcher.newest_epc.get("mainheat-description", ""),
"epc_os_uprn": epc_searcher.newest_epc.get("uprn"),
"domna_property_id": x["Property Ref."],
"domna_full_address": epc_searcher.newest_epc.get(
"address", ", ".join([
find_epc_data["address1"],
find_epc_data["address2"],
])
),
}
)
except Exception as e:
missed.append({
"property_ref": x["Property Ref."],
"rrn": x["EPC Ref."],
"error": str(e)
})
missed_df = pd.DataFrame(missed)
# Store
standardised_ara_df = pd.DataFrame(standardised_ara_list)
standardised_ara_df.to_excel(
"/Users/khalimconn-kowlessar/Downloads/lincs_rural_standardised_ara_nov_2025.xlsx",
index=False,
sheet_name="Standardised Asset List"
)
# Store missed
missed_df.to_excel(
"/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_nov_2025.xlsx",
index=False,
sheet_name="Missed Properties"
)

View file

@ -114,7 +114,7 @@ def app():
"lighting",
"secondary_heating",
"boiler_upgrade",
"high_heat_retention_storage_heater",
"high_heat_retention_storage_heaters",
],
"budget": None,
}

View file

@ -0,0 +1,369 @@
"""
This scipt prepares the raw data that was sent over by Peabody for production of
a standardised asset list
They have sent over just short of 100,000 properties and so, to make this easier, we will do the following
1) Break the data up into subsets of 25,000
2) Combine the data provided into a single list
"""
import json
import time
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from dotenv import load_dotenv
from asset_list.utils import get_data_for_property
from utils.logger import setup_logger
logger = setup_logger()
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
property_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Properties"
)
sustainability_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Sustainability"
)
# Basic overview:
# 1) We have 10,634 postcodes. If we needed to make requests to the ordnance survey API for
# all of these postcodes, it would cost at least £106, not accounting for double requests for postcodes
# where we have more than 100 properties (WE DONT!)
# 2) This is on average 9.36 properties per postcode
# 3) The UPRN in the property_list matches to the Org Ref in the sustainability data. These
# is an additional UPRN column in sustainability data which appears to be the ordnance survey UPRN
# 4) There appears to be some anomalous records, e.g. a flat with 543 m2 floor area and another flat
# with 6m2 floor area
# 5) Based on the residential indicator, all properties appear to be resi
# 6) We should do some quick calcs on how much it might cost to fetch all of the solar API data
# 7) We have 8785 missing UPRNS, which we should potentially try and fill
# 8) In the backend, we should probably start storing the raw EPC input data to allow for much quicker
# re-runs. All we really need to do is store the find my EPC data, perhaps against UPRN and RRN, as well
# as the raw EPC data, against uprn. This will be useful for scenario re-builds and will be much much
# quicker, as a starting point. Do we store in the database vs s3? TBC
n_postcodes = property_list["Post Code"].nunique()
postcode_summary = property_list.groupby("Post Code")["UPRN"].count().reset_index()
postcode_summary["UPRN"].mean()
def classify_floor_area(x):
if x <= 72:
return "0-72"
if x <= 97:
return "73-97"
if x <= 199:
return "98-199"
return "200+"
sustainability_data["Postal Region"] = sustainability_data["Postcode"].str.split(" ").str[0]
sustainability_data["Floor Area Band"] = sustainability_data["Total Floor Area (m2)"].apply(
lambda x: classify_floor_area(x)
)
# Archetype reductions
# Roof insulation category
# 1) Split roof insulation into > 100mm loft and <= 100mm loft
sustainability_data["Roof Insulation Category"] = sustainability_data["Roof Insulation"].copy()
# sustainability_data["Roof Insulation Category"] = np.where(
# sustainability_data["Roof Insulation Category"].isin(
# ['mm200', 'mm300', 'mm250', 'mm150', 'mm270', 'mm400', 'mm350'],
# ),
# "LI > 100mm",
# sustainability_data["Roof Insulation Category"],
# )
# sustainability_data["Roof Insulation Category"] = np.where(
# sustainability_data["Roof Insulation Category"].isin(
# ['mm100', 'mm50', 'mm75', 'mm25'],
# ),
# "LI <= 100mm",
# sustainability_data["Roof Insulation Category"],
# )
# 2) Group all of the glazed together (e.g. double glazed, secondary glazed, triple glazed)
sustainability_data["Glazing Type"] = sustainability_data["Glazing"].copy()
# sustainability_data["Glazing Type"] = np.where(
# sustainability_data["Glazing Type"].isin(
# ['Double 2002 or later', 'Double before 2002', 'Double but age unknown', 'DoubleKnownData']
# ),
# "Double Glazed",
# sustainability_data["Glazing Type"],
# )
# sustainability_data["Glazing Type"] = np.where(
# sustainability_data["Glazing Type"].isin(['Triple', 'TripleKnownData']),
# "Triple Glazed",
# sustainability_data["Glazing Type"],
# )
# 3) Group up boiler efficiency A, B-D, E - G? or someting like this
sustainability_data["Boiler Efficiency Group"] = sustainability_data["Boiler Efficiency"].copy()
# sustainability_data["Boiler Efficiency Group"] = np.where(
# sustainability_data["Boiler Efficiency Group"].isin(['B', 'C', 'D']),
# "B-D",
# sustainability_data["Boiler Efficiency Group"],
# )
# sustainability_data["Boiler Efficiency Group"] = np.where(
# sustainability_data["Boiler Efficiency Group"].isin(['E', 'F', 'G']),
# "E-G",
# sustainability_data["Boiler Efficiency Group"],
# )
# 4) Group up main fuel into gas, electric, oil, other?
sustainability_data["Main Fuel Group"] = sustainability_data["Main Fuel"].copy()
# sustainability_data["Main Fuel Group"] = np.where(
# sustainability_data["Main Fuel Group"].isin(
# ["SmokelessCoal", "BiomassCommunity", "B30DCommunity"]
# ),
# "Other Fuel",
# sustainability_data["Main Fuel Group"],
# )
# 5) Wall Construction - group up Sandstone and Granite into one category
# sustainability_data["Wall Construction"] = np.where(
# sustainability_data["Wall Construction"].isin(["Sandstone", "Granite"]),
# "Sandstone/Granite",
# sustainability_data["Wall Construction"]
# )
# sustainability_data["Wall Construction"] = np.where(
# sustainability_data["Wall Construction"].isin(["Timber Frame", "System"]),
# "Timber/System",
# sustainability_data["Wall Construction"]
# )
# 6) Reduce or remove floor construction
# sustainability_data["Floor Construction"] = np.where(
# sustainability_data["Floor Construction"].isin(["SuspendedTimber", "SuspendedNotTimber"]),
# "Suspended Floor",
# sustainability_data["Floor Construction"]
# )
# 7) Reduce wall insulation
# sustainability_data["Wall Insulation"] = np.where(
# sustainability_data["Wall Insulation"].isin(
# ["FilledCavityPlusInternal", "FilledCavityPlusExternal", "FilledCavity", "External", "Internal"]
# ),
# "Insulated",
# sustainability_data["Wall Insulation"]
# )
# 8) Fill floor insulation
sustainability_data["Floor Insulation"] = sustainability_data["Floor Insulation"].fillna("Unknown")
# 9) Reduce Age bands
# sustainability_data["Construction Years"] = np.where(
# sustainability_data["Construction Years"].isin(["2003-2006", "2007-2011", "2012 onwards"]),
# "2003 onwards",
# sustainability_data["Construction Years"],
# )
# sustainability_data["Construction Years"] = np.where(
# sustainability_data["Construction Years"].isin(["Before 1900", "1900-1929"]),
# "Before 1929",
# sustainability_data["Construction Years"],
# )
# sustainability_data["Construction Years"] = np.where(
# sustainability_data["Construction Years"].isin(["1983-1990", "1991-1995"]),
# "1983-1995",
# sustainability_data["Construction Years"],
# )
# sustainability_data["Construction Years"] = np.where(
# sustainability_data["Construction Years"].isin(["1950-1966", "1967-1975", "1976-1982"]),
# "1950-1982",
# sustainability_data["Construction Years"],
# )
# Roof
# sustainability_data["Roof Construction"] = np.where(
# sustainability_data["Roof Construction"].isin(
# ["PitchedNormalLoftAccess", "PitchedThatched", "PitchedNormalNoLoftAccess", "PitchedWithSlopingCeiling"]
# ),
# "Pitched Roof",
# sustainability_data["Roof Construction"]
# )
archetype_variables = [
"Type", "Attachment", "Construction Years", "Wall Construction", "Wall Insulation",
"Roof Construction", "Roof Insulation Category", "Floor Construction", "Floor Insulation",
"Glazing Type", "Heating", "Boiler Efficiency Group", "Main Fuel Group", "Controls Adequacy",
"Floor Area Band"
]
archetypes = sustainability_data[archetype_variables + ["UPRN"]].dropna().groupby(archetype_variables)[
"UPRN"].nunique().reset_index().rename(columns={"UPRN": "Count"}).sort_values(by="Count",
ascending=False).reset_index(
drop=True)
# We take a sample that represents 95% of the properties
archetypes["Cumulative Count"] = archetypes["Count"].cumsum()
archetypes["Cumulative Proportion"] = archetypes["Cumulative Count"] / archetypes["Count"].sum()
archetypes_95 = archetypes.copy()
archetypes_95["Archetypes_95_reference"] = archetypes_95.index + 1
archetypes_95["Archetypes_95_reference"] = "Archetype_Sample_" + archetypes_95["Archetypes_95_reference"].astype(str)
# For the sample, look for invalid looking UPRNs and remove them.
sample_from = sustainability_data.copy()
# 1) Check for UPRNs that are not numeric or begin with a Zero
sample_from["uprn_not_numeric"] = ~sample_from["UPRN"].apply(lambda x: str(x).isnumeric())
sample_from = sample_from[~sample_from["uprn_not_numeric"]]
sample_from["uprn_has_leading_zero"] = sample_from["UPRN"].apply(lambda x: str(x).startswith("0"))
sample_from = sample_from[~sample_from["uprn_has_leading_zero"]]
sample_from = sample_from[~pd.isnull(sample_from["UPRN"])]
# We now take a sample of the properties that represent 85% of the total properties
sample_from = sample_from.merge(
archetypes_95,
on=archetype_variables,
how="inner"
)
# We take 1 random property, by archetype reference
modelling_sample = sample_from.groupby("Archetypes_95_reference").apply(
lambda x: x.sample(1, random_state=42)
).reset_index(drop=True)
# Checking distributions
def compare_distributions(full_df, sample_df, column):
full_dist = full_df[column].value_counts(normalize=True)
sample_dist = sample_df[column].value_counts(normalize=True)
comparison = pd.concat([full_dist, sample_dist], axis=1, keys=['Full', 'Sample']).fillna(0)
return comparison
for col in archetype_variables:
print(f"--- {col} ---")
print(compare_distributions(sustainability_data, sample_from, col))
# prepare
modelling_sample["domna_property_id"] = modelling_sample.index + 1
# Rename
modelling_sample = modelling_sample.rename(
columns={
"Org Ref": "landlord_property_id", "Address 1": "domna_address_1",
"Postcode": "postcode", "Type": "landlord_property_type",
"Attachment": "landlord_built_form",
"Heating": "landlord_heating_system",
"UPRN": "epc_os_uprn"
}
)
modelling_sample["landlord_built_form"] = modelling_sample["landlord_built_form"].map(
{
"MidTerrace": "Mid-Terrace",
"EndTerrace": "End-Terrace",
"SemiDetached": "Semi-Detached",
"Detached": "Detached",
"EnclosedEndTerrace": "Enclosed End-Terrace",
"EnclosedMidTerrace": "Enclosed Mid-Terrace",
}
)
if pd.isnull(modelling_sample["landlord_built_form"]).sum():
raise ValueError("Some built forms are null after mapping")
# Placeholder copies
def make_full_address(x):
to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']]
to_join = [x for x in to_join if not pd.isnull(x) and x != '']
return ", ".join(to_join)
modelling_sample["domna_full_address"] = modelling_sample.apply(lambda x: make_full_address(x), axis=1)
# Save this CSV as input
modelling_sample.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/modelling_sample.xlsx",
sheet_name="Standardised Asset List"
)
# Save the archetype definitions
archetypes_95.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/archetypes_85.xlsx",
)
# Save the full archetypes
archetypes.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/full_archetypes.xlsx",
)
# Maps the property types to the format recognised by the EPC api
property_type_map = {}
# Maps the build form to the format recognised by the OS api
built_form_map = {}
# Proposed data fetching
# 1) grab propeties with UPRN and fetch the assocated EPC data & find my EPC data
# Some thoughts:
# S3 is quite cheap to query however we may incur some cost if we're making hundreds of thousands of calls
# to S3 to fetch data out of it. It's cheap to fetch data, if we aren't taking data out of S3, but we
# should consider this. This may influence whether or not we want to store each record individually
# against UPRN, or store against the 10,641 postcodes. We can fetch the data and store in a single
# large dump and then determine later if we want to split it up
# TODO: Handle properties without uprn
# TODO: I think we can json dump all of this, but check if we can load and re-use the page source
# TODO: Create batches?
batch_size = 500
batch_indexes = list(range(0, len(sustainability_data), batch_size))
# TODO: SET
working_directory = ""
download_contents = os.listdir(working_directory)
for i in range(0, len(sustainability_data.standardised_asset_list), batch_size):
batch_name = f"batch_{i}_to_{i + batch_size}"
# TODO: Check this
if batch_name in download_contents:
# Means we already have the data downloaded
continue
batch_data = {}
for _, property_data in tqdm(sustainability_data.iterrows(), total=len(sustainability_data)):
os_uprn = property_data["UPRN"]
address1 = property_data["Address 1"]
postcode = property_data["Postcode"]
full_address_components = [
x for x in [property_data["Address 1"], property_data["Address 2"], property_data["Address 3"]]
if not pd.isnull(x)
]
full_address = ", ".join(full_address_components)
fetched_data = get_data_for_property(
address1=address1,
postcode=postcode,
full_address=full_address,
property_type=property_type_map[property_data["Type"]],
built_form=built_form_map[property_data["Attachment"]],
uprn=property_data["UPRN"],
epc_auth_token=EPC_AUTH_TOKEN,
find_my_epc_return_page=True
)
batch_data[property_data["Org Ref"]] = fetched_data
# TODO: We likely want to do something like this: to slow down
# TODO: We also perhaps store the data in batches
if len(batch_data) % 50 == 0 and len(batch_data) > 0:
logger.info("Sleeping for 10 seconds to avoid hitting API rate limit")
time.sleep(10)
# Store the batch data in the wd
with open(os.path.join(working_directory, batch_name), "wb") as f:
json.dump(batch_data, f)

View file

@ -0,0 +1,147 @@
"""
We have found, within the Peabody data, a large volume of properties with missing and incorrects
UPRNS and incorrect address data. We want to flag these records and also find missings where we can
We also have duplicate UPRNS that should be flagged
"""
import json
import time
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from dotenv import load_dotenv
from asset_list.utils import get_data_for_property
from utils.logger import setup_logger
from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
logger = setup_logger()
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
sustainability_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Sustainability"
)
property_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Properties"
)
missing_uprns = sustainability_data[pd.isnull(sustainability_data['UPRN'])].copy()
# Any non-numeric UPRNS or leading with 0s are invalid
non_numeric_uprns = sustainability_data[
~sustainability_data['UPRN'].astype(str).str.match(r'^[1-9][0-9]*$') & ~pd.isnull(sustainability_data['UPRN'])
].copy()
# 70 properties
leading_zero_uprns = sustainability_data[
sustainability_data['UPRN'].astype(str).str.startswith('0')
].copy()
# Flag duplicates
duplicate_uprns = sustainability_data[
sustainability_data.duplicated(subset=['UPRN'], keep=False) & ~pd.isnull(sustainability_data['UPRN'])
].copy()
# Store this data
# missing_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting
# Project/data_validation/missing_uprns.csv", index=False)
# non_numeric_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting
# Project/data_validation/non_numeric_uprns.csv", index=False)
# leading_zero_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting
# Project/data_validation/leading_zero_uprns.csv", index=False)
# duplicate_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting
# Project/data_validation/duplicate_uprns.csv", index=False)
# Take everything remaining
data_needing_validation = sustainability_data[
~sustainability_data["Org Ref"].isin(
missing_uprns["Org Ref"].values.tolist() + non_numeric_uprns["Org Ref"].values.tolist() +
leading_zero_uprns["Org Ref"].values.tolist() + duplicate_uprns["Org Ref"].values.tolist()
)
].copy()
# TODO: We should build a SAL for UPRNS that are missing, invalid or duplicated
# We check UPRN validity against our OS data
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet"
)
# We're going to:
# 1) Grab a filename
# 2) Read it in
# 3) Check which UPRNS from our data are in that file
# 4) Keep a record of which UPRNS were found where
for uprn_file in tqdm(uprn_filenames['filenames'].values, total=len(uprn_filenames)):
spatial_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key=f"spatial/{uprn_file}"
)
uprns_in_file = data_needing_validation[
data_needing_validation['UPRN'].astype('Int64').isin(spatial_data['UPRN'].astype('Int64').values)
].copy()
print("Found {} UPRNS in file {}".format(len(uprns_in_file), uprn_file))
if len(uprns_in_file) > 0:
# Store the found UPRNS in the validation cache
data_to_store = uprns_in_file[["Org Ref", "UPRN"]].copy()
data_to_store["Source File"] = uprn_file
# Store
data_to_store.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
f"Project/data_validation/validation_cache/{uprn_file.split('.parquet')[0]}_found_uprns.csv",
index=False
)
# Get all of the files:
storage_locations = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation/validation_cache")
# List contents
folder_contents = os.listdir(storage_locations)
# Grab files and concatenate
all_found_uprns = []
for file in folder_contents:
if file.endswith("_found_uprns.csv"):
df = pd.read_csv(os.path.join(storage_locations, file))
all_found_uprns.append(df)
all_found_uprns = pd.concat(all_found_uprns)
# We now flag any UPRNS that were not found in any of the OS datasets
os_missed_uprns = data_needing_validation[
~data_needing_validation['Org Ref'].isin(all_found_uprns['Org Ref'].values.tolist())
].copy()
# store
os_missed_uprns.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation/os_missed_uprns.csv",
index=False
)
# Now build a larger table for standardisation
to_standardised = pd.concat(
[missing_uprns, non_numeric_uprns, leading_zero_uprns, duplicate_uprns, os_missed_uprns]
)
to_standardised.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation/to_standardise_uprns.xlsx",
index=False)
# We prepare a finalised dataset to work with, that excludes all problematic properties and leaves us with
# properties for which we have the data we need
finalised_data = sustainability_data[
~sustainability_data["Org Ref"].isin(
to_standardised["Org Ref"].values.tolist()
)
].copy()
# Prepare with the column formats we need, as analogous to a_data_prep where we defined an initial working sample

View file

@ -0,0 +1,95 @@
import pandas as pd
### Prepare
sustainability_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Sustainability"
)
# Data we want to remove:
missing_uprns = sustainability_data[pd.isnull(sustainability_data['UPRN'])].copy()
# Any non-numeric UPRNS or leading with 0s are invalid
non_numeric_uprns = sustainability_data[
~sustainability_data['UPRN'].astype(str).str.match(r'^[1-9][0-9]*$') & ~pd.isnull(sustainability_data['UPRN'])
].copy()
# 70 properties
leading_zero_uprns = sustainability_data[
sustainability_data['UPRN'].astype(str).str.startswith('0')
].copy()
# Flag duplicates
duplicate_uprns = sustainability_data[
sustainability_data.duplicated(subset=['UPRN'], keep=False) & ~pd.isnull(sustainability_data['UPRN'])
].copy()
# Read in the UPRNs that were not valid based on the OS data
os_missed_uprns = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation/os_missed_uprns.csv",
)
modelling_data = sustainability_data[
~sustainability_data["Org Ref"].isin(
missing_uprns["Org Ref"].unique().tolist() + non_numeric_uprns["Org Ref"].unique().tolist() +
leading_zero_uprns["Org Ref"].unique().tolist() + duplicate_uprns["Org Ref"].unique().tolist() +
os_missed_uprns["Org Ref"].unique().tolist()
)
].copy()
# Need to prepare for upload
# Variables:
modelling_data["landlord_property_id"] = sustainability_data["Org Ref"].copy()
modelling_data["domna_property_id"] = sustainability_data["Org Ref"].copy()
modelling_data = modelling_data.rename(
columns={
"Address 1": "domna_address_1",
"Postcode": "postcode",
"Type": "landlord_property_type",
"Attachment": "landlord_built_form",
"Heating": "landlord_heating_system",
"UPRN": "epc_os_uprn"
}
)
def make_full_address(x):
to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']]
to_join = [x for x in to_join if not pd.isnull(x) and x != '']
return ", ".join(to_join)
modelling_data["domna_full_address"] = modelling_data.apply(lambda x: make_full_address(x), axis=1)
modelling_data = modelling_data[
[
"domna_address_1", "Address 2", "Address 3", "postcode", "landlord_property_type",
"landlord_built_form", "landlord_heating_system", "epc_os_uprn", "Total Floor Area (m2)",
"domna_property_id", "domna_full_address"
]
]
modelling_data["landlord_built_form"] = modelling_data["landlord_built_form"].map(
{
"MidTerrace": "Mid-Terrace",
"EndTerrace": "End-Terrace",
"SemiDetached": "Semi-Detached",
"Detached": "Detached",
"EnclosedEndTerrace": "Enclosed End-Terrace",
"EnclosedMidTerrace": "Enclosed Mid-Terrace",
}
)
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
"data.xlsx")
with pd.ExcelWriter(filename) as writer:
modelling_data.to_excel(writer, sheet_name="Standardised Asset List", index=False)
# Store the three sections
modelling_data[0:30000].to_excel(writer, sheet_name="Part 1", index=False)
modelling_data[30000:60000].to_excel(writer, sheet_name="Part 2", index=False)
modelling_data[60000:].to_excel(writer, sheet_name="Part 3", index=False)
modelling_data.sample(60).to_excel(writer, sheet_name="Random testing sample", index=False)

View file

@ -0,0 +1,162 @@
"""
For the Peabody project, there were a number of subtasks that failed due to issues, with the most
prominent being errors with the property address and ID data.
This script will fetch those failed subtasks, get the associated properties and delete the properties
from the database so that the subtasks can be restarted cleanly.
Additionally, we wil find the problematic records and remove them
Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan
or recommendations in case something went wrong
"""
import pandas as pd
from sqlalchemy.orm import Session
from backend.app.db.models.portfolio import PropertyModel
from backend.app.db.connection import db_session
def get_uprns_for_portfolio(session: Session, portfolio_id: int) -> list[int]:
return [
uprn
for (uprn,) in
session.query(PropertyModel.uprn)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
if uprn is not None
]
with db_session() as session:
completed_uprns = get_uprns_for_portfolio(session, 419)
# We now find the portfolio of the SAL, which we did not set off
sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
"data.xlsx",
sheet_name="Standardised Asset List"
)
missed_properties = sal[~sal["epc_os_uprn"].isin(completed_uprns)]
# Store
missed_properties.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
"d_failed_properties_to_restart_20260102.xlsx",
sheet_name="Standardised Asset List",
index=False
)
# Fixing an error - triggered jobs without removing EWI/IWI so need to delete all plans associated to these scenarios:
scenario_id = None
from sqlalchemy import select, func
from sqlalchemy.orm import Session
from backend.app.db.models.recommendations import Plan
def count_plans_for_scenario(session: Session, scenario_id: int) -> int:
return session.execute(
select(func.count())
.select_from(Plan)
.where(Plan.scenario_id == scenario_id)
).scalar_one()
with db_session() as session:
n_plans = count_plans_for_scenario(session, scenario_id)
def get_plan_ids_for_scenario(session: Session, scenario_id: int) -> list[int]:
result = session.execute(
select(Plan.id)
.where(Plan.scenario_id == scenario_id)
)
return [row.id for row in result]
with db_session() as session:
plan_ids = get_plan_ids_for_scenario(session, scenario_id)
from sqlalchemy import text
from sqlalchemy.orm import Session
def chunked(iterable, size):
for i in range(0, len(iterable), size):
yield iterable[i:i + size]
from sqlalchemy import text
from sqlalchemy.orm import Session
def delete_plan_batch(session: Session, plan_ids: list[int]):
if not plan_ids:
return
session.execute(text("SET LOCAL lock_timeout = '5s'"))
params = {"plan_ids": plan_ids}
# ----------------------------
# recommendation_materials
# ----------------------------
session.execute(
text("""
DELETE FROM recommendation_materials rm
USING plan_recommendations pr
WHERE rm.recommendation_id = pr.recommendation_id
AND pr.plan_id = ANY(:plan_ids)
"""),
params,
)
# ----------------------------
# plan_recommendations
# ----------------------------
session.execute(
text("""
DELETE FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
"""),
params,
)
# ----------------------------
# recommendations (only those used by these plans)
# ----------------------------
session.execute(
text("""
DELETE FROM recommendation r
WHERE r.id IN (
SELECT DISTINCT recommendation_id
FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
)
"""),
params,
)
# ----------------------------
# plans LAST
# ----------------------------
session.execute(
text("""
DELETE FROM plan
WHERE id = ANY(:plan_ids)
"""),
params,
)
batch_size = 25
total = (len(plan_ids) + batch_size - 1) // batch_size
for i, batch in enumerate(chunked(plan_ids, batch_size), start=1):
print(f"Deleting plan batch {i}/{total} ({len(batch)} plans)")
with db_session() as session:
delete_plan_batch(session, batch)
print(f"Batch {i} committed")

View file

@ -0,0 +1,145 @@
# We look to match the missed properties to the UPRNS that were sent over by Peabody
from tqdm import tqdm
import pandas as pd
import os
from utils.s3 import read_dataframe_from_s3_parquet
cleaned_uprns = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/PeabodyPropertymatched_Dec25_propref_UPRN.xlsx"
)
# Grab the problematic records
problematic_records = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation/to_standardise_uprns.xlsx"
)
# Remove dupe on Org Ref
problematic_records = problematic_records.drop_duplicates("Org Ref")
df = problematic_records.merge(
cleaned_uprns,
left_on="Org Ref",
right_on="reference"
)
# df_had_uprn = df[~pd.isnull(df["UPRN"])]
# We prepare the data for analysis
df["landlord_property_id"] = df["Org Ref"].copy()
df["domna_property_id"] = df["Org Ref"].copy()
df = df.rename(
columns={
"Address 1": "domna_address_1",
"Postcode": "postcode",
"Type": "landlord_property_type",
"Attachment": "landlord_built_form",
"Heating": "landlord_heating_system",
"out_uprn": "epc_os_uprn"
}
)
def make_full_address(x):
to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']]
to_join = [x for x in to_join if not pd.isnull(x) and x != '']
return ", ".join(to_join)
df["domna_full_address"] = df.apply(lambda x: make_full_address(x), axis=1)
df = df[
[
"domna_address_1", "Address 2", "Address 3", "postcode", "landlord_property_type",
"landlord_built_form", "landlord_heating_system", "epc_os_uprn", "Total Floor Area (m2)",
"domna_property_id", "domna_full_address"
]
]
df["landlord_built_form"] = df["landlord_built_form"].map(
{
"MidTerrace": "Mid-Terrace",
"EndTerrace": "End-Terrace",
"SemiDetached": "Semi-Detached",
"Detached": "Detached",
"EnclosedEndTerrace": "Enclosed End-Terrace",
"EnclosedMidTerrace": "Enclosed Mid-Terrace",
}
)
# We have a lot of dupes - remove them
df["epc_os_uprn"].duplicated().sum()
dupe_uprns = df[df["epc_os_uprn"].duplicated()]["epc_os_uprn"].values
dupe_df = df[df["epc_os_uprn"].isin(dupe_uprns)]
dupe_df = dupe_df.sort_values("epc_os_uprn", ascending=True)
# Remove clear duplicate UPRNs because of unreliability
df = df[~df["epc_os_uprn"].isin(dupe_uprns)]
filename = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
"UPRNS.xlsx"
)
with pd.ExcelWriter(filename) as writer:
df.to_excel(writer, sheet_name="Standardised Asset List", index=False)
# Check these are valid
# We check UPRN validity against our OS data
# uprn_filenames = read_dataframe_from_s3_parquet(
# bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet"
# )
#
# # We're going to:
# # 1) Grab a filename
# # 2) Read it in
# # 3) Check which UPRNS from our data are in that file
# # 4) Keep a record of which UPRNS were found where
#
# for uprn_file in tqdm(uprn_filenames['filenames'].values, total=len(uprn_filenames)):
# spatial_data = read_dataframe_from_s3_parquet(
# bucket_name="retrofit-data-dev", file_key=f"spatial/{uprn_file}"
# )
#
# uprns_in_file = df[
# df['out_uprn'].astype('Int64').isin(spatial_data['UPRN'].astype('Int64').values)
# ].copy()
#
# print("Found {} UPRNS in file {}".format(len(uprns_in_file), uprn_file))
# if len(uprns_in_file) > 0:
# # Store the found UPRNS in the validation cache
# data_to_store = uprns_in_file[["Org Ref", "UPRN"]].copy()
# data_to_store["Source File"] = uprn_file
# # Store
# data_to_store.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
# f"Project/data_validation/missing_uprn_validation_cache/{uprn_file.split('.parquet')[0]}_found_uprns.csv",
# index=False
# )
#
# storage_locations = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
# "Project/data_validation/missing_uprn_validation_cache")
# # List contents
# folder_contents = os.listdir(storage_locations)
# # Grab files and concatenate
# all_found_uprns = []
# for file in folder_contents:
# if file.endswith("_found_uprns.csv"):
# df = pd.read_csv(os.path.join(storage_locations, file))
# all_found_uprns.append(df)
#
# all_found_uprns = pd.concat(all_found_uprns)
#
# invalid = df[
# ~df["Org Ref"].isin(all_found_uprns["Org Ref"].values)
# ]
#
# uprn_example = 10095401237
# eg = uprn_filenames[
# (uprn_filenames["upper"] >= uprn_example) & (uprn_filenames["lower"] <= uprn_example)
# ]
# eg2 = read_dataframe_from_s3_parquet(
# bucket_name="retrofit-data-dev", file_key=f"spatial/{eg['filenames'].values[0]}"
# )
#
# eg2[eg2["UPRN"] == uprn_example]

View file

@ -0,0 +1,246 @@
"""
This script performs a deep dive into the various scenarios and checks fundamental things
This includes:
1) Do properties that should have a plan, have a plan? E.g. if the property is EPC D, and has a plan getting up to
# EPC C, there should be a plan
2) If the plan is fabric first, make sure they are actually fabric first
"""
import pandas as pd
scenario_names = {
871: "EPC C, fabric first, no solid floor, ashp 3.0",
863: "EPC B, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
862: "EPC B, No solid floor, ASHP COP 3.0",
861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP",
859: "EPC C, no solid floor, ashp 3.0",
}
scenario_sap_targets = {
871: 69,
863: 81,
862: 81,
861: 69,
859: 69,
}
problems = []
for scenario_id, scenario_name in scenario_names.items():
# Read in the recommended measures
print("Reading")
df = pd.read_excel(
f"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
f"{scenario_name}.xlsx"
)
# find properties that are below the scenario sap target, but have no recommended measures
df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id]
df["no_recommended_measures"] = df["sap_points"] == 0
df["zero_cost"] = df["total_retrofit_cost"] == 0
df["sap_points_above_zero"] = df["sap_points"] > 0
# Also look for zero cost and SAP points > 0
problematic_properties = df[
(df["below_scenario_target"] & df["no_recommended_measures"])
].copy()
if scenario_sap_targets[scenario_id] == 81:
problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"]
zero_cost_above_zero_sap = df[
(df["sap_points_above_zero"] & df["zero_cost"])
].copy()
# show all columns
# Source - https://stackoverflow.com/a
# Posted by YOLO, modified by community. See post 'Timeline' for change history
# Retrieved 2026-01-06, License - CC BY-SA 4.0
# pd.set_option('display.max_rows', 500)
# pd.set_option('display.max_columns', 500)
# pd.set_option('display.width', 1000)
# problematic_properties.head(len(problematic_properties))
print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})")
print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})")
problems.append(problematic_properties)
problems.append(zero_cost_above_zero_sap)
# plan_input = [
# {
# "uprn": 100022725126,
# "address": "FLAT 5 Daveys Court",
# "postcode": "WC2N 4BW"
# }
# ]
# plan_input = [
# {
# "uprn": 100120966352,
# "address": "FLAT 11 Kingsgate",
# "postcode": "OX18 2BP"
# }
# ]
plan_input = [
{
"uprn": 200003371857,
"postcode": "SE1 5SJ",
"address": "39 BUTTERMERE CLOSE",
}
]
all_problems = pd.concat(problems)
all_problems = all_problems.drop_duplicates(subset=["uprn"])
sal = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
"data.xlsx",
sheet_name="Standardised Asset List"
)
sal2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
"UPRNS.xlsx",
sheet_name="Standardised Asset List"
)
sal = pd.concat([sal, sal2])
retry = sal[sal["epc_os_uprn"].isin(all_problems["uprn"])]
# Store
retry.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
"d_problematic_properties_to_review_20260106.xlsx",
sheet_name="Standardised Asset List",
index=False
)
# Delete associated plans
# 1) Get the property IDs for these UPRNS, for this portfolio
portfolio_id = 419
uprns = retry["epc_os_uprn"].tolist()
# TODO: Delete all plans for these properties and re-build
from sqlalchemy.orm import Session
from backend.app.db.models.portfolio import PropertyModel
from backend.app.db.connection import db_session
from backend.app.db.models.recommendations import Plan
from sqlalchemy import select, delete
from sqlalchemy.exc import NoResultFound
from sqlalchemy.orm import sessionmaker
def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]:
return [
property.id
for property in session.query(PropertyModel)
.filter(
PropertyModel.portfolio_id == portfolio_id,
PropertyModel.uprn.in_(uprns)
)
.all()
]
with db_session() as session:
property_ids_to_delete = get_property_ids_for_uprns(session, portfolio_id, uprns)
# Get all and delete plans for these property IDs
def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]:
return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all()
def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]:
return [
plan.id
for plan in session.query(Plan)
.filter(Plan.property_id.in_(property_ids))
.all()
]
with db_session() as session:
plan_ids_to_delete = get_ids_of_plans_for_deletion(session, property_ids_to_delete)
def chunked(iterable, size):
for i in range(0, len(iterable), size):
yield iterable[i:i + size]
from sqlalchemy import text
from sqlalchemy.orm import Session
def delete_plan_batch(session: Session, plan_ids: list[int]):
if not plan_ids:
return
session.execute(text("SET LOCAL lock_timeout = '5s'"))
params = {"plan_ids": plan_ids}
# ----------------------------
# recommendation_materials
# ----------------------------
session.execute(
text("""
DELETE FROM recommendation_materials rm
USING plan_recommendations pr
WHERE rm.recommendation_id = pr.recommendation_id
AND pr.plan_id = ANY(:plan_ids)
"""),
params,
)
# ----------------------------
# plan_recommendations
# ----------------------------
session.execute(
text("""
DELETE FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
"""),
params,
)
# ----------------------------
# recommendations (only those used by these plans)
# ----------------------------
session.execute(
text("""
DELETE FROM recommendation r
WHERE r.id IN (
SELECT DISTINCT recommendation_id
FROM plan_recommendations
WHERE plan_id = ANY(:plan_ids)
)
"""),
params,
)
# ----------------------------
# plans LAST
# ----------------------------
session.execute(
text("""
DELETE FROM plan
WHERE id = ANY(:plan_ids)
"""),
params,
)
batch_size = 25
total = (len(plan_ids_to_delete) + batch_size - 1) // batch_size
for i, batch in enumerate(chunked(plan_ids_to_delete, batch_size), start=1):
print(f"Deleting plan batch {i}/{total} ({len(batch)} plans)")
with db_session() as session:
delete_plan_batch(session, batch)
print(f"Batch {i} committed")

Some files were not shown because too many files have changed in this diff Show more