diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 50e1c8ee..3d681ca3 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,4 +1,8 @@ +<<<<<<< HEAD FROM python:3.11.10-slim-bullseye +======= +FROM python:3.11.10-bullseye +>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d ARG USER=vscode @@ -10,6 +14,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential pkg-config automake autoconf libtool \ && rm -rf /var/lib/apt/lists/* +<<<<<<< HEAD # 2) Build and install libpostal from source RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \ && cd /tmp/libpostal \ @@ -19,12 +24,24 @@ RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \ && make install \ && ldconfig \ && rm -rf /tmp/libpostal +======= +# # 2) Build and install libpostal from source +# RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \ +# && cd /tmp/libpostal \ +# && ./bootstrap.sh \ +# && ./configure --datadir=/usr/local/share/libpostal \ +# && make -j"$(nproc)" \ +# && make install \ +# && ldconfig \ +# && rm -rf /tmp/libpostal +>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d # 3) Create the user and grant sudo privileges RUN useradd -m -s /usr/bin/bash ${USER} \ && echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \ && chmod 0440 /etc/sudoers.d/${USER} +<<<<<<< HEAD # 4) Python deps - if you want to run assest list ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 ADD asset_list/requirements.txt requirements.txt @@ -36,10 +53,28 @@ RUN pip install -r requirements.txt # ADD backend/app/requirements/requirements.txt requirements2.txt # RUN cat requirements1.txt requirements2.txt > requirements.txt # RUN pip install -r requirements.txt +======= +# # 4) Python deps - if you want to run assest list +# ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 +# ADD asset_list/requirements.txt requirements.txt +# RUN pip install -r requirements.txt + +# +ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 +ADD backend/engine/requirements.txt requirements1.txt +ADD backend/app/requirements/requirements.txt requirements2.txt +ADD .devcontainer/requirements.txt requirements3.txt +RUN cat requirements1.txt requirements2.txt requirements3.txt > requirements.txt +RUN pip install -r requirements.txt +>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d # 5) Workdir WORKDIR /workspaces/model # 6) Make Python find your package # Add project root to PYTHONPATH for all processes +<<<<<<< HEAD ENV PYTHONPATH=/workspaces/model:${PYTHONPATH} +======= +ENV PYTHONPATH=/workspaces/model:${PYTHONPATH} +>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 49bd6f83..80a56bf2 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -23,8 +23,18 @@ "4ops.terraform", "fabiospampinato.vscode-todo-plus", "jgclark.vscode-todo-highlight", +<<<<<<< HEAD "corentinartaud.pdfpreview" ] } +======= + "corentinartaud.pdfpreview", + "ms-python.vscode-python-envs" + ] + } + }, + "containerEnv": { + "PYTHONFLAGS": "-Xfrozen_modules=off" +>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d } } diff --git a/.devcontainer/post-install.sh b/.devcontainer/post-install.sh index d9fc3a9e..5c8f2324 100644 --- a/.devcontainer/post-install.sh +++ b/.devcontainer/post-install.sh @@ -1,3 +1,4 @@ +<<<<<<< HEAD # #!/bin/bash # poetry install; @@ -25,3 +26,19 @@ # "$SETTINGS_FILE" > "$SETTINGS_FILE.tmp" && mv "$SETTINGS_FILE.tmp" "$SETTINGS_FILE" # echo "✅ Updated VS Code to use Poetry environment: $VENV_PATH" +======= +mkdir -p ~/.ipython/profile_default/startup + +cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py +from dotenv import load_dotenv +import os + +# Adjust path as needed +env_path = "/workspaces/model/backend/.env" +if os.path.exists(env_path): + load_dotenv(env_path) + print("✔ Loaded .env into Jupyter kernel") +else: + print("⚠ No .env file found to load") +EOF +>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt new file mode 100644 index 00000000..300b86b0 --- /dev/null +++ b/.devcontainer/requirements.txt @@ -0,0 +1,20 @@ +# fastapi +fastapi==0.115.2 +sqlalchemy==2.0.36 +pydantic-settings==2.6.0 +psycopg2-binary==2.9.10 +python-jose==3.3.0 +cryptography==43.0.3 +mangum==0.19.0 +# AWS +boto3==1.35.44 +# Data +openpyxl==3.1.2 +# Basic +pytz +uvicorn[standard] +sqlmodel +# Testing +pytest==9.0.2 +pytest-cov==7.0.0 +ipykernel>=6.25,<7 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 5e247d77..6268360b 100644 --- a/.gitignore +++ b/.gitignore @@ -242,6 +242,8 @@ fabric.properties local_data/* /local_data/* etl/epc/local_data/* +/backend/condition/sample_data/lbwf/* +/backend/condition/sample_data/peabody/* *.DS_Store infrastructure/terraform/.terraform* @@ -275,4 +277,6 @@ cache/ */.idea *.png -*.pptx \ No newline at end of file +*.pptx + +local_data* \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..6b76b4fa --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Current File", + "type": "debugpy", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 27782c10..3d4c6b42 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -9,6 +9,12 @@ "path": "/bin/bash" } }, +<<<<<<< HEAD +======= + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "python.testing.pytestArgs": ["-s", "-q", "--no-cov"] +>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d // Hot reload setting that needs to be in user settings // "jupyter.runStartupCommands": [ diff --git a/BaseUtility.py b/BaseUtility.py index 1a31c5d0..fb5d3d67 100644 --- a/BaseUtility.py +++ b/BaseUtility.py @@ -1,5 +1,4 @@ from etl.epc.settings import DATA_ANOMALY_MATCHES as data_anon_matches -from etl.epc.settings import DATA_ANOMALY_MATCHES as data_anon_matches class Definitions: diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index b66475ac..ea4d8b34 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -310,6 +310,17 @@ class AssetList: 'NAME OF SURVEYOR' ] + # Solar non-intrusive fields + NON_INTRUSIVES_SOLAR_COLNAMES = [ + 'PV, ACCESS ISSUE, SEE NOTES', 'ROOF ORIENTATION', + 'AREA (m²) OF ROOF WHERE PV WILL BE SITUATED ', 'SHADING', + 'Roof Tiles - CONCRETE/SLATE/ROSEMARY', + 'NO. OF PANELS (Typical size of 420W panel is 1mx1.7m and need 30cm all the way around panels)', + 'SCAFFOLD REQUIRED? IF YES, ARE THERE ANY SURROUNDING ACCESS ISSUES - PLEASE DESCRIBE', + 'IF PANELS ARE GOING ON REAR PLEASE CHECK FOR SPACE FOR SCAFFOLDING - DESCRIBE ANY ISSUES BELOW', + 'DATE', 'NAME OF SURVEYOR' + ] + NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility'] @@ -462,6 +473,8 @@ class AssetList: self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns + self.solar_non_intrusives_present = "AREA (m²) OF ROOF WHERE PV WILL BE SITUATED" in self.raw_asset_list.columns + # Names of columns self.landlord_property_id = landlord_property_id self.address1_colname = address1_colname @@ -775,6 +788,9 @@ class AssetList: if self.new_format_non_insturives_present_v2: non_intrusive_columns += self.NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 + if self.solar_non_intrusives_present: + non_intrusive_columns += self.NON_INTRUSIVES_SOLAR_COLNAMES + if self.old_format_non_intrusives_present: # We check if we have the ECO Eligibility column, which we might not have non_intrusive_columns = [ @@ -947,7 +963,7 @@ class AssetList: if self.phase: # We filter on just the properties that have had an inspection - if self.new_format_non_insturives_present_v2: + if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present: self.standardised_asset_list = self.standardised_asset_list[ ~self.standardised_asset_list['NAME OF SURVEYOR'].isin( ["YET TO BE SURVEYED", "", None] @@ -982,7 +998,15 @@ class AssetList: # Keep a record of duplicates self.duplicated_addresses = self.standardised_asset_list[ self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated() - ][[self.DOMNA_PROPERTY_ID, self.address1_colname, self.postcode_colname]].copy() + ][[self.DOMNA_PROPERTY_ID, self.full_address_colname, self.address1_colname, self.postcode_colname]].copy() + + df = self.standardised_asset_list[ + self.standardised_asset_list[self.DOMNA_PROPERTY_ID].isin( + self.duplicated_addresses[self.DOMNA_PROPERTY_ID]) + ][[self.landlord_property_id, self.DOMNA_PROPERTY_ID, self.full_address_colname, self.address1_colname, + self.postcode_colname]].copy() + + df = df.sort_values(by=[self.DOMNA_PROPERTY_ID]) self.standardised_asset_list = self.standardised_asset_list[ ~self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated() @@ -1346,10 +1370,10 @@ class AssetList: # for identifying cavity jobs if self.non_intrusives_present and not self.old_format_non_intrusives_present: - if self.new_format_non_insturives_present_v2: + if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present: existing_solar_non_intrusives_check = ( self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"].str.strip().isin( - ["ALREADY HAS SOLAR PV"] + ["ALREADY HAS SOLAR PV", "ALREADY HAS PV"] ) ) else: @@ -1788,9 +1812,16 @@ class AssetList: ) ) - not_a_flat = ( - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "flat" - ) + # Determine if the client gave us property type in the first place + if all(self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "unknown"): + # Use EPC + not_a_flat = ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["property-type"]] != "Flat" + ) + else: + not_a_flat = ( + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "flat" + ) solar_roof_meets_criteria = ( self.standardised_asset_list["solar_epc_roof_insulated"] | @@ -3457,7 +3488,13 @@ class AssetList: raise ValueError("No installer column found in master data") measure_mix_col = "MEASURE COMBO" - town_colname = "TOWN" if "TOWN" in master_data.columns else 'Town/Area' + + if "TOWN" in master_data.columns: + town_colname = "TOWN" + elif 'Town/Area' in master_data.columns: + town_colname = 'Town/Area' + else: + town_colname = "Town/City" logger.info("Matching master data to asset list") matched = [] diff --git a/asset_list/app.py b/asset_list/app.py index 44cefa39..a832784c 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -57,6 +57,7 @@ def app(): EPC recommendations Property UPRN """ +<<<<<<< HEAD data_folder = ("/workspaces/model/asset_list") data_filename = "assets.xlsx" sheet_name = "Sheet1" @@ -70,11 +71,31 @@ def app(): landlord_os_uprn = None landlord_property_type = None landlord_built_form = None +======= + + data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Warmfront/SCIS") + data_filename = "SCIS_Historic_Deemed_Combined_Workings.xlsx" + sheet_name = "SCIS" + postcode_column = 'POSTCODE' + address1_column = "NO" + address1_method = None + fulladdress_column = None + address_cols_to_concat = ["NO", "Street / Block Name", "Town/Area"] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "PROPERTY TYPE As per table emailed" + landlord_built_form = "PROPERTY TYPE As per table emailed" +>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None +<<<<<<< HEAD landlord_property_id = "LLUPRN" +======= + landlord_property_id = "Row ID" +>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d landlord_sap = None outcomes_filename = None outcomes_sheetname = None @@ -90,6 +111,77 @@ def app(): asset_list_header = 0 landlord_block_reference = None +<<<<<<< HEAD +======= + # Peabody data for cleaning + data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation") + data_filename = "to_standardise_uprns.xlsx" + sheet_name = "Sheet1" + postcode_column = 'Postcode' + address1_column = "Address 1" + address1_method = None + fulladdress_column = None + address_cols_to_concat = ["Address 1", "Address 2", "Address 3"] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Type" + landlord_built_form = "Attachment" + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "Org Ref" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + + # Lambeth: + # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th" + # data_filename = "lambeth_sw2_leigham court estate.xlsx" + # sheet_name = "Sheet1" + # postcode_column = 'Postcode' + # address1_column = "Address" + # address1_method = None + # fulladdress_column = None + # address_cols_to_concat = ["Address"] + # missing_postcodes_method = None + # landlord_year_built = None + # landlord_os_uprn = None + # landlord_property_type = None + # landlord_built_form = None + # landlord_wall_construction = None + # landlord_roof_construction = None + # landlord_heating_system = None + # landlord_existing_pv = None + # landlord_property_id = "row_id" + # landlord_sap = None + # outcomes_filename = None + # outcomes_sheetname = None + # outcomes_postcode = None + # outcomes_houseno = None + # outcomes_id = None + # outcomes_address = None + # master_filepaths = [] + # master_id_colnames = [] + # master_to_asset_list_filepath = None + # phase = False + # ecosurv_landlords = None + # asset_list_header = 0 + # landlord_block_reference = None +>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d # Maps addresses to uprn in problematic cases manual_uprn_map = {} @@ -228,22 +320,22 @@ def app(): ) # We now retrieve any failed properties - chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)] - epc_data_failed, _, _ = get_data( - df=chunk_failed, - row_id_name=asset_list.DOMNA_PROPERTY_ID, - uprn_column=AssetList.STANDARD_UPRN, - fulladdress_column=AssetList.STANDARD_FULL_ADDRESS, - address1_column=AssetList.STANDARD_ADDRESS_1, - postcode_column=AssetList.STANDARD_POSTCODE, - property_type_column=AssetList.STANDARD_PROPERTY_TYPE, - built_form_column=AssetList.STANDARD_BUILT_FORM, - manual_uprn_map=manual_uprn_map, - epc_api_only=epc_api_only, - epc_auth_token=EPC_AUTH_TOKEN - ) - - epc_data_chunk.extend(epc_data_failed) + # chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)] + # epc_data_failed, _, _ = get_data( + # df=chunk_failed, + # row_id_name=asset_list.DOMNA_PROPERTY_ID, + # uprn_column=AssetList.STANDARD_UPRN, + # fulladdress_column=AssetList.STANDARD_FULL_ADDRESS, + # address1_column=AssetList.STANDARD_ADDRESS_1, + # postcode_column=AssetList.STANDARD_POSTCODE, + # property_type_column=AssetList.STANDARD_PROPERTY_TYPE, + # built_form_column=AssetList.STANDARD_BUILT_FORM, + # manual_uprn_map=manual_uprn_map, + # epc_api_only=epc_api_only, + # epc_auth_token=EPC_AUTH_TOKEN + # ) + # + # epc_data_chunk.extend(epc_data_failed) # Append the failed data to the main data # Store the chunk locally as a csv @@ -385,6 +477,26 @@ def app(): filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data + # Determine inspections priority + # solar_jobs = asset_list.standardised_asset_list[~pd.isnull(asset_list.standardised_asset_list["solar_reason"])][ + # "domna_postcode"].unique() + # asset_list.standardised_asset_list["in_solar_area"] = asset_list.standardised_asset_list["domna_postcode"].isin( + # solar_jobs + # ) + # # Same for cav + # cavity_jobs = asset_list.standardised_asset_list[ + # ~pd.isnull(asset_list.standardised_asset_list["cavity_reason"]) + # ]["domna_postcode"].unique() + # asset_list.standardised_asset_list["in_cavity_area"] = asset_list.standardised_asset_list["domna_postcode"].isin( + # cavity_jobs + # ) + # # We prioritise properties that are in solar areas and cavity areas + # import numpy as np + # asset_list.standardised_asset_list["inspection_priority"] = np.where( + # asset_list.standardised_asset_list["in_solar_area"] | asset_list.standardised_asset_list["in_cavity_area"], + # 1, 2 + # ) + with pd.ExcelWriter(filename) as writer: asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) if asset_list.block_analysis_df is not None: @@ -404,4 +516,11 @@ def app(): if not asset_list.geographical_areas.empty: asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False) +<<<<<<< HEAD print("done") +======= + + # Store dupes + if not asset_list.duplicated_addresses.empty: + asset_list.duplicated_addresses.to_excel(writer, sheet_name="Duplicate Properties", index=False) +>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index 0dc51129..a9defdef 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -438,6 +438,86 @@ BUILT_FORM_MAPPINGS = { 'Maisonette - Mid Terrace': 'mid-terrace', 'Chalet - Wheelchair': 'unknown', 'Studio Flat': 'unknown', - 'Bungalow - Attached': 'semi-detached' + 'Bungalow - Attached': 'semi-detached', + 'ND': 'unknown', + + 'Maisonette: Mid Terrace: Mid Floor': 'mid-floor', + 'Maisonette: Semi Detached: Ground Floor': 'semi-detached', + 'Maisonette: Enclosed Mid Terrace: Ground Floor': 'enclosed mid-terrace', + 'Maisonette: Enclosed End Terrace: Ground Floor': 'end-terrace', + 'Maisonette: Mid Terrace: Ground Floor': 'mid-terrace', + 'Flat: Semi Detached: Basement': 'semi-detached', + 'Maisonette: Semi Detached: Top Floor': 'semi-detached', + 'Maisonette: Enclosed Mid Terrace: Mid Floor': 'enclosed mid-terrace', + 'Flat: Detached: Basement': 'detached', + 'Maisonette: Enclosed Mid Terrace: Top Floor': 'enclosed mid-terrace', + + 'Maisonette: End Terrace: Top Floor': 'top-floor', + 'House: Mid Terrace: Ground Floor': 'ground floor', + 'Maisonette: Semi Detached: Mid Floor': 'detached', + 'Maisonette: Detached: Mid Floor': 'detached', + 'Bungalow: EnclosedMidTerrace': 'enclosed mid-terrace', + + 'House: EnclosedMidTerrace': 'enclosed mid-terrace', + + 'EnclosedMidTerrace': 'enclosed mid-terrace', + 'EnclosedEndTerrace': 'enclosed end-terrace', + 'EndTerrace': 'end-terrace', + 'SemiDetached': 'semi-detached', + 'MidTerrace': 'mid-terrace', + + '1st FLOOR FLAT': 'mid-floor', + 'END TERRACE HOUSE': 'end-terrace', + 'BUNGALOW-END TERRACE': 'end-terrace', + 'BUNGALOW END TERRACE': 'end-terrace', + 'END-TERRACE': 'end-terrace', + 'SEMI DETACHED': 'semi-detached', + 'Mid flat Ground Floor': 'ground floor', + 'MID TERRACED': 'mid-terrace', + 'Mid Terrace bungalow': 'mid-terrace', + 'BUNGLAOW SEMI DETACHED': 'detached', + 'Bungalow ENd Terrace': 'end-terrace', + 'Bungalow Semi detached': 'detached', + 'BUNGALOW - SEMI DETACHED': 'detached', + 'Bungalow mid terrace': 'mid-terrace', + 'BUNGALOW - MID TERRACED': 'mid-terrace', + 'BUNGALOW - MID TERRACE': 'mid-terrace', + 'Bungalow end terrace': 'end-terrace', + 'BUNGALOW SEMI-DETACHED': 'detached', + 'MID TERR': 'mid-terrace', + 'Bungalow - mid terrace': 'mid-terrace', + 'MID-TERRACE': 'mid-terrace', + 'Bunagalow Semi Detached': 'semi-detached', + 'SEMI DETACHED BUNGALOW': 'semi-detached', + 'MID TERRACE HOUSE': 'mid-terrace', + 'END - TERRACE': 'end-terrace', + 'BUNGALOW-SEMI DETACHED': 'semi-detached', + 'Semi-Detached': 'semi-detached', + 'End-Terrace house': 'end-terrace', + 'BUNGALOW MID TERRACE': 'mid-terrace', + 'SEMI DETACHED HOUSE': 'semi-detached', + 'BUNGALOW SEMI DETACHED': 'detached', + 'MID - TERRACE': 'mid-terrace', + '3 EXT WALL FLAT': 'end-terrace', + '3 Ext wall flat': 'end-terrace', + '3 EX WALL FLAT': 'end-terrace', + '2 ext wall flats': 'mid-terrace', + '2 EXT WALLS': 'mid-terrace', + '3.EXT.WALL FLAT': 'end-terrace', + 'FLAT 3 WALLS': 'end-terrace', + '2 Ext Wall flat': 'mid-terrace', + 'DETATCHED HOUSE': 'detached', + '3 EXT. WALL FLAT': 'end-terrace', + '3 ext wall flat': 'end-terrace', + '3 EXT WALLS': 'end-terrace', + '3 EXT WALL - NOW 2 EXT': 'unknown', + '3 EXT-WALL FLAT': 'end-terrace', + 'FLAT 2 WALLS': 'mid-terrace', + '3 EX WALL MAISONETTE': 'end-terrace', + '3 Ext Wall Flat': 'end-terrace', + 'Semi Bungalow': 'semi-detached', + '2 EXT WALL FLAT': 'mid-terrace', + '2.EXT.WALL FLAT': 'mid-terrace', + '2 EXT. WALL FLAT': 'mid-terrace', } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 424b9b46..ffd1b198 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -473,5 +473,27 @@ HEATING_MAPPINGS = { 'Boiler and radiators, oil': 'oil boiler', 'Boiler and radiators, electric': 'electric boiler', 'No system present: electric heaters assumed': 'electric radiators', - 'Boiler and radiators, anthracite': 'solid fuel' + 'Boiler and radiators, anthracite': 'solid fuel', + + 'Heat networks Heat networks (mains gas)': 'communal heating', + 'ND Oil': 'oil fuel', + 'Boiler Biofuel': 'boiler - other fuel', + + 'Electric (direct acting) room heaters: Water- or oil-filled radiators': 'room heaters', + 'Other: Electric ceiling heating': 'electric ceiling', + 'Heat Pump: Electric Heat pumps: Air source heat pump with flow temperature <= 35°C': 'air source heat pump', + 'Oil room heaters: Room heater, 2000 or later': 'room heaters', + 'Electric Underfloor Heating: In screed above insulation (standard or off peak)': 'electric underfloor', + 'Heat Pump: Electric Heat pumps: Air source heat pump in other cases': 'air source heat pump', + 'Electric Storage Systems: Old (large volume) storage heaters': 'electric storage heaters', + + 'Gas (including LPG) room heaters: Condensing gas fire': 'room heaters', + 'Solid fuel room heaters: Open fire in grate': 'solid fuel', + 'Solid fuel room heaters: Open fire with back boiler (no radiators)': 'solid fuel', + 'Community Heating Systems: Community heat pump (RdSAP)': 'communal heating', + 'Gas (including LPG) room heaters: Gas fire, open flue, 1980 or later (open fronted), sitting proud of, ' + 'and sealed to, fireplace opening': 'room heaters', + 'Boiler: A rated Regular Boiler, System 2: Boiler: C rated Regular Boiler': 'boiler - other fuel', + 'Boiler: G rated Combi': 'gas condensing combi' + } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 290e172a..1f251598 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -343,5 +343,90 @@ PROPERTY_MAPPING = { 'bungalow': 'bungalow', 'flat': 'flat', 'FLA': 'flat', - 'HOU': 'house' + 'HOU': 'house', + + 'Maisonette: Mid Terrace: Mid Floor': 'maisonette', + 'Maisonette: Semi Detached: Ground Floor': 'maisonette', + 'Maisonette: Enclosed Mid Terrace: Ground Floor': 'maisonette', + 'Maisonette: Enclosed End Terrace: Ground Floor': 'maisonette', + 'Maisonette: Mid Terrace: Ground Floor': 'maisonette', + 'Flat: Semi Detached: Basement': 'flat', + 'Maisonette: Semi Detached: Top Floor': 'maisonette', + 'Maisonette: Enclosed Mid Terrace: Mid Floor': 'maisonette', + 'Flat: Detached: Basement': 'flat', + 'Maisonette: Enclosed Mid Terrace: Top Floor': 'maisonette', + + 'Maisonette: End Terrace: Top Floor': 'maisonette', + 'House: Mid Terrace: Ground Floor': 'house', + 'Bungalow: EnclosedMidTerrace': 'bungalow', + 'Maisonette: Semi Detached: Mid Floor': 'maisonette', + 'Maisonette: Detached: Mid Floor': 'maisonette', + + 'House: EnclosedMidTerrace': 'house', + + '3 EXT WALL FLAT': 'flat', + '1st FLOOR FLAT': 'flat', + '3 Ext wall flat': 'flat', + '3 EX WALL FLAT': 'flat', + 'END TERRACE HOUSE': 'house', + 'BUNGALOW-END TERRACE': 'bungalow', + 'BUNGALOW END TERRACE': 'bungalow', + '2 ext wall flats': 'flat', + 'Mid flat Ground Floor': 'flat', + '3.EXT.WALL FLAT': 'flat', + 'FLAT 3 WALLS': 'flat', + 'Mid Terrace bungalow': 'bungalow', + 'Bungalow ENd Terrace': 'bungalow', + '2 Ext Wall flat': 'flat', + 'DETATCHED HOUSE': 'house', + 'Bungalow Semi detached': 'bungalow', + 'BUNGALOW - SEMI DETACHED': 'bungalow', + 'Bungalow mid terrace': 'bungalow', + 'BUNGALOW - MID TERRACED': 'bungalow', + 'BUNGALOW - MID TERRACE': 'bungalow', + 'Bungalow end terrace': 'bungalow', + '3 EXT. WALL FLAT': 'flat', + '3 ext wall flat': 'flat', + 'BUNGALOW SEMI-DETACHED': 'bungalow', + '3 EXT-WALL FLAT': 'flat', + 'Bungalow - mid terrace': 'bungalow', + 'SEMI DETACHED BUNGALOW': 'bungalow', + 'FLAT 2 WALLS': 'flat', + 'MID TERRACE HOUSE': 'house', + '3 EX WALL MAISONETTE': 'maisonette', + 'BUNGALOW-SEMI DETACHED': 'bungalow', + '3 Ext Wall Flat': 'flat', + 'Semi Bungalow': 'bungalow', + 'End-Terrace house': 'house', + 'BUNGALOW MID TERRACE': 'bungalow', + 'Mid-terrace house': 'house', + 'SEMI DETACHED HOUSE': 'house', + 'Semi-detached house': 'house', + '2 EXT WALL FLAT': 'flat', + '2.EXT.WALL FLAT': 'flat', + 'BUNGALOW SEMI DETACHED': 'bungalow', + '2 EXT. WALL FLAT': 'flat', + 'END-TERRACE': 'unknown', + 'SEMI DETACHED': 'unknown', + '2 EXT WALLS': 'unknown', + 'MID TERRACED': 'unknown', + 'BUNGLAOW SEMI DETACHED': 'bungalow', + 'END TERRACE': 'unknown', + '3 EXT WALLS': 'unknown', + 'Mid Terrace': 'unknown', + '3 EXT WALL - NOW 2 EXT': 'unknown', + 'MID TERR': 'unknown', + 'DETACHED': 'unknown', + 'MID-TERRACE': 'unknown', + 'Bunagalow Semi Detached': 'bungalow', + 'End-terrace': 'unknown', + 'END - TERRACE': 'unknown', + 'SEMI-DETACHED': 'unknown', + 'Semi-Detached': 'unknown', + 'MID TERRACE': 'unknown', + 'End Terrace': 'unknown', + 'Detached': 'unknown', + 'Mid-terrace': 'unknown', + 'MID - TERRACE': 'unknown' + } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 60f0473c..0857b046 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -246,4 +246,59 @@ ROOF_CONSTRUCTION_MAPPINGS = { 'Pitched, 150 mm loft insulation': 'pitched insulated', 'Flat, limited insulation (assumed)': 'flat uninsulated', + 'Pitched (no access to loft) 350mm': 'pitched insulated', + 'Pitched (no access to loft) 200mm': 'pitched insulated', + 'Pitched (access to loft) 200mm': 'pitched insulated', + 'Pitched (no access to loft) 250mm': 'pitched insulated', + 'Pitched (access to loft) 100mm': 'pitched insulated', + 'Another dwelling above ND (inferred)': 'another dwelling above', + 'Pitched (no access to loft) N/A': 'pitched no access to loft', + 'Pitched (no access to loft) ND (inferred)': 'pitched no access to loft', + 'Pitched (no access to loft) 150mm': 'pitched insulated', + 'Pitched (access to loft) 400mm+': 'pitched insulated', + 'Pitched (no access to loft) 300mm': 'pitched insulated', + 'Pitched (access to loft) <25mm': 'pitched less than 100mm insulation', + 'Pitched (access to loft) None': 'pitched less than 100mm insulation', + 'Pitched (access to loft) 300mm': 'pitched insulated', + 'Pitched (access to loft) 50mm': 'pitched less than 100mm insulation', + 'Pitched (access to loft) 270mm': 'pitched insulated', + 'Pitched (access to loft) Non-joist': 'pitched access to loft', + 'Pitched (access to loft) 250mm': 'pitched insulated', + 'Another dwelling above N/A': 'another dwelling above', + 'Pitched (access to loft) 150mm': 'pitched insulated', + 'Pitched (access to loft) ND (inferred)': 'pitched access to loft', + 'Pitched (access to loft) 350mm': 'pitched insulated', + 'Pitched (access to loft) NR': 'pitched unknown insulation', + 'Pitched (access to loft) 75mm': 'pitched less than 100mm insulation', + 'Pitched (access to loft) N/A': 'pitched access to loft', + 'ND (inferred) 250mm': 'unknown insulated', + 'Pitched (vaulted ceiling) Non-joist': 'pitched unknown insulation', + 'ND (inferred) ND (inferred)': 'unknown', + 'Flat Non-joist': 'flat insulated', + 'Same dwelling above N/A': 'another dwelling above', + + 'Flat: As Built, PitchedNormalLoftAccess: Unknown': 'flat unknown insulation', + 'PitchedNormalLoftAccess: Unknown, PitchedNormalNoLoftAccess: Unknown': 'pitched unknown insulation', + 'PitchedNormalLoftAccess: 400mm+': 'pitched insulated', + 'AnotherDwellingAbove: 150mm': 'another dwelling above', + 'Flat: 150mm': 'flat insulated', + 'AnotherDwellingAbove: 50mm': 'another dwelling above', + 'PitchedNormalNoLoftAccess: As Built': 'pitched no access to loft', + 'PitchedNormalLoftAccess: 250mm, PitchedWithSlopingCeiling: As Built': 'pitched insulated', + 'PitchedNormalLoftAccess: 200mm, PitchedWithSlopingCeiling: As Built': 'pitched insulated', + 'PitchedNormalLoftAccess: 350mm': 'pitched insulated', + 'PitchedNormalNoLoftAccess: 270mm': 'pitched no access to loft', + 'AnotherDwellingAbove: 100mm': 'another dwelling above', + + 'PitchedWithSlopingCeiling: Unknown': 'piched unknown insulation', + 'AnotherDwellingAbove: Unknown, Flat: As Built': 'another dwelling above', + 'Flat: Unknown, PitchedNormalLoftAccess: 25mm': 'flat unknown insulation', + 'SameDwellingAbove: Unknown': 'another dwelling above', + 'Flat: Unknown': 'flat unknown insulation', + 'Flat: 50mm, PitchedNormalLoftAccess: 100mm': 'flat insulated', + 'Flat: As Built, PitchedNormalLoftAccess: 250mm, PitchedWithSlopingCeiling: As Built': 'flat unknown insulation', + 'Flat: As Built, PitchedNormalLoftAccess: 400mm+': 'flat unknown insulation', + 'PitchedWithSlopingCeiling: As Built': 'pitched insulated', + 'PitchedNormalLoftAccess: As Built': 'pitched unknown insulation', + } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 14e4565c..418ae9f8 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -342,5 +342,18 @@ WALL_CONSTRUCTION_MAPPINGS = { 'Solid brick, as built, partial insulation (assumed)': 'insulated solid brick', 'Sandstone, as built, no insulation (assumed)': 'uninsulated sandstone or limestone', 'System built, as built, partial insulation (assumed)': 'system built unknown insulation', - 'Timber frame, with external insulation': 'insulated timber frame' + 'Timber frame, with external insulation': 'insulated timber frame', + + 'Cob As-built': 'cob', + 'System built Unknown insulation': 'system built unknown insulation', + 'Solid brick Unknown insulation': 'solid brick unknown insulation', + 'Timber frame Internal': 'insulated timber frame', + 'System built External': 'insulated system built', + 'Stone As-built': 'uninsulated sandstone or limestone', + 'System built As-built': "uninsulated system built", + 'System built Internal': 'insulated system built', + + 'Cavity: AsBuilt (1976-1982), TimberFrame: AsBuilt': 'cavity unknown insulation', + 'Cavity: FilledCavityPlusExternal': 'filled cavity' + } diff --git a/asset_list/utils.py b/asset_list/utils.py index fe2b7d14..8746c03a 100644 --- a/asset_list/utils.py +++ b/asset_list/utils.py @@ -1,6 +1,7 @@ import time import random import pandas as pd + from backend.SearchEpc import SearchEpc from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc from tqdm import tqdm @@ -9,6 +10,132 @@ from utils.logger import setup_logger logger = setup_logger() +def get_data_for_property( + address1: str, + postcode: str, + full_address: str, + property_type: [str | None], + built_form: [str | None], + uprn: [str | float | None], + epc_auth_token: str, + find_my_epc_return_page: bool +): + """ + Utility function that will fetch the data for a single property + :return: + """ + + if property_type == "block of flats": + return None + + house_number = str(address1).strip() + full_address = full_address.strip() + house_no = SearchEpc.get_house_number(address=str(house_number), postcode=postcode) + if house_no is None: + house_no = house_number + + if pd.isnull(uprn): + uprn = None + + searcher = SearchEpc( + address1=str(house_no), + postcode=postcode, + auth_token=epc_auth_token, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address, + max_retries=5, + uprn=uprn + ) + # Force the skipping of estimating the EPC + # We check if the property was split + + searcher.ordnance_survey_client.property_type = property_type + searcher.ordnance_survey_client.built_form = built_form + searcher.find_property(skip_os=True) + + # Check if we have a flat or appartment + if searcher.newest_epc is None and uprn is None: + # Try again: + if SearchEpc.get_house_number(address=str(house_number), postcode=postcode) is None: + # Backup + add1 = full_address.split(",") + if len(add1) > 1: + add1 = add1[1].strip() + else: + # Try splitting on space + add1 = full_address.split(" ")[0].strip() + else: + add1 = str(house_number) + searcher = SearchEpc( + address1=add1, + postcode=postcode, + auth_token=epc_auth_token, + os_api_key="", + property_type=None, + fast=True, + full_address=full_address, + max_retries=5 + ) + + if ( + "flat" in house_number.lower() or "apartment" in house_number.lower() or "apt" in + house_number.lower() + ): + searcher.ordnance_survey_client.property_type = "Flat" + + searcher.find_property(skip_os=True) + + # As a final resort, we estimate the EPC + if property_type is not None and searcher.newest_epc is None: + searcher.ordnance_survey_client.property_type = property_type + searcher.ordnance_survey_client.built_form = built_form + searcher.find_property(skip_os=True) + + if searcher.newest_epc is None: + return None + + # Retrieve data from FindMyEPC + try: + find_epc_searcher = RetrieveFindMyEpc( + address=searcher.newest_epc["address"], + postcode=searcher.newest_epc["postcode"] + ) + find_epc_response = find_epc_searcher.retrieve_newest_find_my_epc_data( + return_page=find_my_epc_return_page + ) + + except ValueError as e: + if "No EPC found" in str(e) and "address1" in searcher.newest_epc: + try: + find_epc_searcher = RetrieveFindMyEpc( + address=searcher.newest_epc["address1"], postcode=searcher.newest_epc["postcode"] + ) + find_epc_response = find_epc_searcher.retrieve_newest_find_my_epc_data() + except ValueError as e: + if "No EPC found" in str(e): + find_epc_response = ({}, None) if find_my_epc_return_page else ({}) + else: + logger.error(f"Error retrieving FindMyEPC data: {e}") + raise Exception(f"Error retrieving FindMyEPC data: {e}") + else: + find_epc_response = ({}, None) if find_my_epc_return_page else ({}) + except Exception as e: + raise Exception(f"Error retrieving FindMyEPC data: {e}") + + newest_epc = searcher.newest_epc + older_epcs = searcher.older_epcs + + find_my_epc_page = None + if find_my_epc_return_page: + find_my_epc_data, find_my_epc_page = find_epc_response + else: + find_my_epc_data = find_epc_response + + return newest_epc, older_epcs, find_my_epc_data, find_my_epc_page + + def get_data( df, manual_uprn_map, diff --git a/backend/Funding.py b/backend/Funding.py index 33c94e11..4ec57f8a 100644 --- a/backend/Funding.py +++ b/backend/Funding.py @@ -1,11 +1,14 @@ from enum import Enum from typing import List import pandas as pd +from utils.logger import setup_logger from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes from backend.app.plan.schemas import VALID_HOUSING_TYPES, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES, \ MEASURE_MAP +logger = setup_logger(__name__) + class EligibilityCaveats(Enum): EPC_RATING = "epc_rating" # EPC requirements not met @@ -365,6 +368,8 @@ class Funding: starting_str = "1.7" elif closest_starting == 1: starting_str = "1.0" + elif closest_starting == 0.6: + starting_str = "0.6" else: starting_str = f"{closest_starting:.2f}" @@ -539,6 +544,8 @@ class Funding: pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == measure_code] if pps.shape[0] != 1: + if pps.empty and self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]: + return 0 raise ValueError(f"Invalid IWI category: {measure_code}") return pps.squeeze()["Cost Savings"] @@ -551,6 +558,8 @@ class Funding: pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == measure_code] if pps.shape[0] != 1: + if pps.empty and self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]: + return 0 raise ValueError(f"Invalid EWI category: {measure_code}") return pps.squeeze()["Cost Savings"] @@ -559,6 +568,8 @@ class Funding: pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == measure_code] if pps.shape[0] != 1: + if pps.empty and self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]: + return 0 raise ValueError(f"Invalid CWI category: {measure_code}") return pps.squeeze()["Cost Savings"] @@ -578,6 +589,11 @@ class Funding: return pps.squeeze()["Cost Savings"] if measure_type == "flat_roof_insulation": + + # Not funding for properties starting at C or above + if self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]: + return 0 + pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == "FRI"] if pps.shape[0] != 1: raise ValueError("Invalid FRI category") @@ -589,6 +605,8 @@ class Funding: code = "RIRI_res_unin" pps = filtered_pps_matrix[filtered_pps_matrix["Measure_Type"] == code] if pps.shape[0] != 1: + if pps.empty and self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]: + return 0 raise ValueError(f"Invalid RIRI category: {code}") return pps.squeeze()["Cost Savings"] @@ -632,13 +650,25 @@ class Funding: if self.starting_sap_band in ["Low_C", "High_C", "Low_B", "High_B", "Low_A", "High_A"]: return 0 - pps = filtered_pps_matrix[ - (filtered_pps_matrix["Pre_Main_Heating_Source"] == pre_heating_system) & - (filtered_pps_matrix["Post_Main_Heating_Source"] == "Air to Water ASHP") & - (filtered_pps_matrix["Measure_Type"] == "B_Upgrade_nopreHCs") + pps_data = filtered_pps_matrix[ + filtered_pps_matrix["Post_Main_Heating_Source"] == "Air to Water ASHP" + ] + + if pre_heating_system not in pps_data["Pre_Main_Heating_Source"].values: + logger.info( + f"No PPS data for ASHP upgrade from {pre_heating_system}, returning 0" + ) + return 0 + + pps = pps_data[ + (pps_data["Pre_Main_Heating_Source"] == pre_heating_system) & + (pps_data["Measure_Type"] == "B_Upgrade_nopreHCs") # We assume we'll be making a heating system upgrade ] + # Not every pre heating system will result in PPS, e.g. a ground source heat pump to ASHP upgrade + # won't have a PPS. + if pps.shape[0] != 1: raise ValueError("something went wrong, more than one pps for ashp") return pps.squeeze()["Cost Savings"] @@ -789,7 +819,7 @@ class Funding: if not has_eligibile_heating: # We check if there is a recommendation for an ASHP or HHRSH if ("air_source_heat_pump" not in measure_types) and ( - "high_heat_retention_storage_heater" not in measure_types): + "high_heat_retention_storage_heaters" not in measure_types): return True, False, True # 2) We check if there is a wall insulation measure for this property. If so, we make sure diff --git a/backend/Property.py b/backend/Property.py index bd968e9f..0df29405 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -22,8 +22,8 @@ from recommendations.recommendation_utils import ( ) from backend.ml_models.AnnualBillSavings import AnnualBillSavings from backend.app.utils import sap_to_epc -from backend.Funding import Funding import backend.app.assumptions as assumptions +from backend.app.db.models.portfolio import rating_lookup ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev") DATA_BUCKET = os.environ.get( @@ -65,6 +65,7 @@ class Property: # Surplus information, that can be provided as optional inputs, by a customer n_bathrooms = None n_bedrooms = None + landlord_property_id = None # unique reference for the property as recognised by the landlord building_id = None # Used to group properties together into a single building # Contains the solar panel optimisation results from the Google Solar API @@ -80,12 +81,14 @@ class Property: postcode, address, epc_record, + uprn=None, # Pass as an optional input property_valuation=None, already_installed=None, non_invasive_recommendations=None, measures=None, energy_assessment=None, is_new=True, + inspections=None, **kwargs ): @@ -106,7 +109,7 @@ class Property: # of the non-invasive surveys. We reflect that this has been installed in the recommendations, but remove the # cost and instead, provide a message that the measure has already been installed - self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else [] + self.already_installed = already_installed self.non_invasive_recommendations = ( non_invasive_recommendations['recommendations'] if non_invasive_recommendations else [] @@ -119,7 +122,7 @@ class Property: self.valuation = property_valuation - self.uprn = epc_record.get("uprn") + self.uprn = uprn if uprn is not None else epc_record.get("uprn") self.uprn_source = self.data.get("uprn-source") self.full_sap_epc = epc_record.get("full_sap_epc") @@ -209,6 +212,9 @@ class Property: self.energy_assessment_condition_data = energy_assessment["condition"] self.energy_assessment_is_newer = energy_assessment["energy_assessment_is_newer"] + # Store inspections + self.inspections = inspections + # TODO: We keep this but only temporarily until we add bathrooms, bedrooms, building id to the condition data self.parse_kwargs(kwargs) @@ -265,8 +271,9 @@ class Property: "number_of_floors": number_of_floors, "insulation_floor_area": insulation_floor_area, "insulation_wall_area": insulation_wall_area, - "building_id": kwargs.get("building_id", None), - "floor_area": floor_area + "building_id": kwargs.get("building_id", kwargs.get("landlord_block_reference", None)), + "floor_area": floor_area, + "landlord_property_id": kwargs.get("landlord_property_id"), } def parse_kwargs(self, kwargs): @@ -295,9 +302,18 @@ class Property: if k in fixed_data_col_names } - difference_record = self.epc_record.create_EPCDifferenceRecord( - self.epc_record, fixed_data - ) + difference_record = self.epc_record.create_EPCDifferenceRecord(self.epc_record, fixed_data) + + # We have rare cases where entire description columns are missing. EpcRecords will convert this to None. + # Due to the sensitivity of the EPCDifferenceRecord creation to missing data, we will fill in these missing + # descriptions with and empty string, for the purpose of creating this scoring record + description_cols = [ + x for x in difference_record.difference_record if + "_description" in x and difference_record.difference_record[x] is None + ] + if description_cols: + for col in description_cols: + difference_record.difference_record[col] = "" self.base_difference_record = TrainingDataset(datasets=[difference_record], cleaned_lookup=cleaned_lookup) @@ -325,7 +341,6 @@ class Property: recommendation_record=recommendation_record, recommendations=property_representative_recommendations, primary_recommendation_id=self.id, - non_invasive_recommendations=self.non_invasive_recommendations, ) return scoring_dict @@ -454,10 +469,8 @@ class Property: # It means we've recommended HHR with electric immersion, and shouldn't overwrite # the hot water description continue + # Set the new value otherwise as it's due to already installed measures - do nothing - raise NotImplementedError( - "Already have this key in the phase_epc_transformation - implement me" - ) phase_epc_transformation[k] = v simulation_epc.update(phase_epc_transformation) self.simulation_epcs[rec["recommendation_id"]] = simulation_epc @@ -596,7 +609,13 @@ class Property: if self.data[description] in self.DATA_ANOMALY_MATCHES: template = cleaned[description][0] - fill_dict = dict(zip(template.keys(), [None] * len(template))) + # Handling edge case for walls + fill_with = False if description == "walls-description" else None + fill_dict = dict(zip(template.keys(), [fill_with] * len(template))) + if description == "walls-description": + fill_dict["thermal_transmittance_unit"] = None + fill_dict["insulation_thickness"] = "none" + fill_dict.update( { "original_description": self.data[description], @@ -721,11 +740,12 @@ class Property: self.energy_cost_estimates = { "unadjusted": unadjusted_heating_costs, - "epc": { - "heating": float(self.data["heating-cost-current"]), - "hot_water": float(self.data["hot-water-cost-current"]), - "lighting": float(self.data["lighting-cost-current"]), - } + # Don't think we need the EPC + # "epc": { + # "heating": float(self.data["heating-cost-current"]), + # "hot_water": float(self.data["hot-water-cost-current"]), + # "lighting": float(self.data["lighting-cost-current"]), + # } } self.energy_consumption_estimates = { @@ -778,13 +798,19 @@ class Property: to_update[k] = None return to_update - def get_full_property_data(self, current_valuation=None): + def get_full_property_data(self, current_valuation=None, needs_rebaselining=False, rebaselining_sap=0): """ This method extracts the data which is pushed to the database, containing core information, from the EPC about a property :return: """ + current_sap_rating = float(self.data["current-energy-efficiency"]) + if needs_rebaselining: + current_sap_rating += rebaselining_sap + + current_epc_rating = sap_to_epc(current_sap_rating) + property_data = { "creation_status": "READY", "uprn": int(self.data["uprn"]), @@ -801,9 +827,12 @@ class Property: "number_of_rooms": self.number_of_rooms, "year_built": self.year_built, "tenure": self.data["tenure"], - "current_epc_rating": self.data["current-energy-rating"], - "current_sap_points": self.data["current-energy-efficiency"], + "current_epc_rating": current_epc_rating, + "current_sap_points": current_sap_rating, "current_valuation": current_valuation, + "original_sap_points": self.data["current-energy-efficiency"], + "is_sap_points_adjusted_for_installed_measures": needs_rebaselining, + "installed_measures_sap_point_adjustment": rebaselining_sap, } property_data = self._clean_upload_data(property_data) @@ -811,7 +840,7 @@ class Property: return property_data @classmethod - def _prepare_rating_field(cls, field, rating_lookup): + def _prepare_rating_field(cls, field): """ Utility function for usage in the lambda, for preparing the _rating fields """ @@ -821,48 +850,68 @@ class Property: else None ) - def get_property_details_epc(self, portfolio_id: int, rating_lookup): + def get_property_details_epc( + self, portfolio_id: int, needs_rebaselining: bool = False, rebaselining_carbon: float = 0, + rebaselining_heat_demand: float = 0, rebaselining_kwh: float = 0, rebaselining_bills: float = 0 + ): if self.current_energy_bill is None: raise ValueError("Current energy bill has not been set") + # IF we have a SAP05 overwrite, we pull out the relevant information + sap_05_overwritten = self.data.get("sap-05-overwritten", False) + + sap_05_score, sap_05_epc_rating = None, None + if sap_05_overwritten: + if not self.old_data: + # Trying to fetch SAP05 EPC but no data + raise ValueError("Trying to fetch SAP05 EPC but no old data available") + # We get the last rating from the old data + newest_old_epc = max(self.old_data, key=lambda d: pd.to_datetime(d["lodgement-date"])) + # Get the rating and score + sap_05_score = int(newest_old_epc["current-energy-efficiency"]) + sap_05_epc_rating = newest_old_epc["current-energy-rating"] + + lodgement_date = self.data["lodgement-date"] + # We check if the lodgement date is more than 10 years old + is_expired = (datetime.now() - pd.to_datetime(lodgement_date)) > timedelta(days=3650) + + # Handle re-baselining + co2_emissions = self.energy["co2_emissions"] + primary_energy_consumption = self.energy["primary_energy_consumption"] + current_kwh_demand = self.current_energy_consumption + current_kwh_heating_hotwater = self.current_energy_consumption_heating_hotwater + if needs_rebaselining: + # Carbon will be reduced + co2_emissions -= rebaselining_carbon + # Heat demand will be reduced + primary_energy_consumption -= rebaselining_heat_demand + current_kwh_demand -= rebaselining_kwh + current_kwh_heating_hotwater -= rebaselining_kwh + property_details_epc = { "property_id": self.id, "portfolio_id": portfolio_id, + "lodgement_date": datetime.fromisoformat(lodgement_date), + "is_expired": is_expired, "full_address": self.data["address"], "total_floor_area": float(self.data["total-floor-area"]), "walls": self.walls["clean_description"], - "walls_rating": self._prepare_rating_field( - self.data["walls-energy-eff"], rating_lookup - ), + "walls_rating": self._prepare_rating_field(self.data["walls-energy-eff"]), "roof": self.roof["clean_description"], - "roof_rating": self._prepare_rating_field( - self.data["roof-energy-eff"], rating_lookup - ), + "roof_rating": self._prepare_rating_field(self.data["roof-energy-eff"]), "floor": self.floor["clean_description"], - "floor_rating": self._prepare_rating_field( - self.data["floor-energy-eff"], rating_lookup - ), + "floor_rating": self._prepare_rating_field(self.data["floor-energy-eff"]), "windows": self.windows["clean_description"], - "windows_rating": self._prepare_rating_field( - self.data["windows-energy-eff"], rating_lookup - ), + "windows_rating": self._prepare_rating_field(self.data["windows-energy-eff"]), "heating": self.main_heating["clean_description"], - "heating_rating": self._prepare_rating_field( - self.data["mainheat-energy-eff"], rating_lookup - ), + "heating_rating": self._prepare_rating_field(self.data["mainheat-energy-eff"]), "heating_controls": self.main_heating_controls["clean_description"], - "heating_controls_rating": self._prepare_rating_field( - self.data["mainheatc-energy-eff"], rating_lookup - ), + "heating_controls_rating": self._prepare_rating_field(self.data["mainheatc-energy-eff"]), "hot_water": self.hotwater["clean_description"], - "hot_water_rating": self._prepare_rating_field( - self.data["hot-water-energy-eff"], rating_lookup - ), + "hot_water_rating": self._prepare_rating_field(self.data["hot-water-energy-eff"]), "lighting": self.lighting["clean_description"], - "lighting_rating": self._prepare_rating_field( - self.data["lighting-energy-eff"], rating_lookup - ), + "lighting_rating": self._prepare_rating_field(self.data["lighting-energy-eff"]), "mainfuel": self.main_fuel["clean_description"], "ventilation": self.ventilation["ventilation"], "solar_pv": self.solar_pv["solar_pv"], @@ -871,19 +920,30 @@ class Property: "floor_height": self.floor_height, "heat_loss_corridor": self.heat_loss_corridor["heat_loss_corridor_boolean"], "unheated_corridor_length": self.heat_loss_corridor["length"], - "number_of_open_fireplaces": self.number_of_open_fireplaces[ - "number_of_open_fireplaces" - ], + "number_of_open_fireplaces": self.number_of_open_fireplaces["number_of_open_fireplaces"], "number_of_extensions": self.number_of_extensions["number_of_extensions"], "number_of_storeys": self.number_of_storeys["number_of_storeys"], "mains_gas": self.mains_gas, "energy_tariff": self.data["energy-tariff"], - "primary_energy_consumption": self.energy["primary_energy_consumption"], - "co2_emissions": self.energy["co2_emissions"], - "current_energy_demand": self.current_energy_consumption, - "current_energy_demand_heating_hotwater": self.current_energy_consumption_heating_hotwater, + "primary_energy_consumption": primary_energy_consumption, + "co2_emissions": co2_emissions, + "current_energy_demand": current_kwh_demand, # This is kwh - naming is confusing + "current_energy_demand_heating_hotwater": current_kwh_heating_hotwater, # This is kwh "estimated": self.data.get("estimated", False), - **self.current_energy_bill + # We indicate if we've overwritten a SAP 05 EPC + "sap_05_overwritten": sap_05_overwritten, + "sap_05_score": sap_05_score, + "sap_05_epc_rating": sap_05_epc_rating, + **self.current_energy_bill, + "original_co2_emissions": self.energy["co2_emissions"], + "original_primary_energy_consumption": self.energy["primary_energy_consumption"], + "original_current_energy_demand": self.current_energy_consumption, # Bad naming, this is kwh + "original_current_energy_demand_heating_hotwater": self.current_energy_consumption_heating_hotwater, # kwh + "installed_measures_co2_adjustment": rebaselining_carbon, + "installed_measures_energy_demand_adjustment": rebaselining_kwh, # kwh + "installed_measures_total_energy_bill_adjustment": rebaselining_bills, + "installed_measures_heat_demand_adjustment": rebaselining_heat_demand, + "is_epc_adjusted_for_installed_measures": needs_rebaselining, } return property_details_epc @@ -1070,7 +1130,12 @@ class Property: elif self.floor["thermal_transmittance"] is not None: self.floor_type = "solid" else: - raise NotImplementedError("Implement this floor type") + # in this case, it's not super clear what the floor type is, so we default - this is a temp + logger.warning( + f"Could not determine floor type, given: '{self.floor['original_description']}', defaulting to " + f"suspended for property {self.uprn}" + ) + self.floor_type = "suspended" @staticmethod def _extract_component( @@ -1154,6 +1219,7 @@ class Property: 'has_community_scheme': 'Varied (Community Scheme)', "has_dual_fuel_mineral_and_wood": 'Wood Logs', "has_electricaire": 'Electricity', + "has_wood_chips": 'Wood Logs' } # Hot water @@ -1185,6 +1251,19 @@ class Property: 'oil range cooker': 'Oil' } + fuel_map = { + None: "Natural Gas (Community Scheme)", + "mains gas": "Natural Gas (Community Scheme)", + "biomass": "Smokeless Fuel", + "electricity": "Electricity", + "biogas": "Smokeless Fuel", + "heat network": "Natural Gas (Community Scheme)", + "lpg": 'LPG', + "biodiesel": "Smokeless Fuel", + "b30d": "B30K Biofuel", + "coal": "Coal", + } + self.heating_energy_source = list({ fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False) }) @@ -1197,6 +1276,12 @@ class Property: else: self.heating_energy_source = ['Electricity'] + if set(self.heating_energy_source) == {'Electricity', 'LPG'}: + if self.main_fuel["clean_description"] in ["Lpg not community", "Lpg community"]: + self.heating_energy_source = ['LPG'] + else: + self.heating_energy_source = ['Electricity'] + if set(self.heating_energy_source) == {'Natural Gas', 'Wood Logs'}: # It means they have mixed heating so we take the primary one, based on main fuel # This will probably happen in the case of an extension @@ -1205,21 +1290,50 @@ class Property: else: self.heating_energy_source = ['Wood Logs'] - if len(self.heating_energy_source) == 0 or len(self.heating_energy_source) > 1: + if len(self.heating_energy_source) > 1 and "Varied (Community Scheme)" not in self.heating_energy_source: + # We might have something like heating energy source equal to ['Natural Gas', 'Varied (Community Scheme)'] + # so we treat this as community heating raise Exception("Investigate me") + if len(self.heating_energy_source) == 0: + heating_flags = { + v for k, v in self.main_heating.items() if k not in ["original_description", "clean_description"] + } + hotwater_flags = { + v for k, v in self.hotwater.items() if k not in ["original_description", "clean_description"] + } + + # If all flags are zero, we have a no data example + if (heating_flags == {False} or hotwater_flags == {None}) and ( + hotwater_flags == {False} or hotwater_flags == {None}): + # We have nodata so we try and rely on main fuel + if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown + mapped_fuel = fuel_map[self.main_fuel["fuel_type"]] + self.heating_energy_source = mapped_fuel + self.hot_water_energy_source = mapped_fuel + return + else: + raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}") + + # We handle edge case where no heating system is indicated + if self.main_fuel["fuel_type"] in fuel_map: + mapped_fuel = fuel_map[self.main_fuel["fuel_type"]] + self.heating_energy_source = mapped_fuel + self.hot_water_energy_source = mapped_fuel + return + + if len(self.heating_energy_source) > 1: + # We treat this as a community scheme + self.heating_energy_source = ["Varied (Community Scheme)"] + self.heating_energy_source = self.heating_energy_source[0] if self.heating_energy_source == "Varied (Community Scheme)": - fuel_map = { - None: "Natural Gas (Community Scheme)", - "mains gas": "Natural Gas (Community Scheme)", - "biomass": "Smokeless Fuel", - } + if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown self.heating_energy_source = fuel_map[self.main_fuel["fuel_type"]] else: - raise Exception("Implement me") + raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}") if self.hotwater["heater_type"] is not None: self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]] @@ -1241,7 +1355,7 @@ class Property: secondary_heating = self.data["secondheat-description"] self.hot_water_energy_source = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[secondary_heating]["fuel"] else: - raise Exception("Investiage me") + raise NotImplementedError(f"Investiage me - unhandled hot water fuel {fuel}") else: self.hot_water_energy_source = hotwater_appliance_to_fuel[self.hotwater["appliance"]] @@ -1294,9 +1408,17 @@ class Property: self.roof["is_flat"] or self.roof["is_pitched"] or self.roof["is_roof_room"] ) # If there is no existing solar PV, the photo-supply field will be None or a missing value - has_no_existing_solar_pv = self.data["photo-supply"] in [ - None, 0, self.DATA_ANOMALY_MATCHES - ] + + # We use inspections data to tell us this + + if getattr(self.inspections, "roof_orientation", None): + has_no_existing_solar_pv = self.inspections.roof_orientation.value not in [ + "already has solar pv", "roof too small", "no roof" + ] + else: + has_no_existing_solar_pv = self.data["photo-supply"] in [ + None, 0, self.DATA_ANOMALY_MATCHES + ] return is_valid_property_type and is_valid_roof_type and has_no_existing_solar_pv @@ -1325,29 +1447,14 @@ class Property: if not self.is_ashp_valid(measures=["air_source_heat_pump"]): return self.current_energy_consumption - # If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain - remap_fuel_sources = [ - "Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity", "Coal", "Smokeless Fuel", - "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal" - ] - - heating_energy_source = self.heating_energy_source - hot_water_energy_source = self.hot_water_energy_source heating_consumption = self.energy_consumption_estimates["unadjusted"]["heating"] hotwater_consumption = self.energy_consumption_estimates["unadjusted"]["hot_water"] - if (heating_energy_source not in remap_fuel_sources) or ( - hot_water_energy_source not in remap_fuel_sources + ["Electricity + Solar Thermal"] - ): - raise NotImplementedError("Have not implemented estimating electrical consumption for this fuel type") + # Adjust the heating consumption to reflect the expected efficiency of an ASHP - broadly 3.0 COP + heating_consumption = heating_consumption / (assumed_ashp_efficiency / 100) - if heating_energy_source in remap_fuel_sources: - # Adjust the heating consumption to reflect the expected efficiency of an ASHP - heating_consumption = heating_consumption / (assumed_ashp_efficiency / 100) - - if hot_water_energy_source in remap_fuel_sources: - # Adjust the hot water consumption to reflect the expected efficiency of an ASHP - hotwater_consumption = hotwater_consumption / (assumed_ashp_efficiency / 100) + # Adjust the hot water consumption to reflect the expected efficiency of an ASHP + hotwater_consumption = hotwater_consumption / (assumed_ashp_efficiency / 100) electric_consumption = ( heating_consumption + diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 16dd8f04..a633176e 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -144,6 +144,11 @@ class SearchEpc: "error": None } + # Keys that we check for missing values to determine if the EPC is incomplete + CHECK_MISSING_KEYS = [ + "lighting-cost-current", "heating-cost-current", "hot-water-cost-current", "energy-consumption-potential" + ] + def __init__( self, address1: str, @@ -156,6 +161,8 @@ class SearchEpc: size=None, property_type=None, fast=False, + heating_system: [str, None] = None, + associated_uprns: [List[int] | None] = None ): """ Address lines 1 and postcode are mandatory fields. The other address lines are optional @@ -171,6 +178,11 @@ class SearchEpc: :param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's default :param property_type: str, optional, the property type of the property, if known before hand + :param fast: bool, optional, if true, the extract_epc_data method will skip some processing to return + results faster + :param heating_system: str, optional, the heating system of the property, if known before hand + :param associated_uprns: list of int, optional, list of associated uprns for the property. E.g. other + units in a block of flats """ self.address1 = address1 @@ -179,6 +191,10 @@ class SearchEpc: self.uprn = uprn self.house_number = self.get_house_number(self.address1) self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number) + self.associated_uprns = associated_uprns if associated_uprns is not None else [] + + # property attributes + self.heating_system = heating_system self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES @@ -188,7 +204,7 @@ class SearchEpc: ) self.data = None - self.newest_epc = None + self.newest_epc = {} self.older_epcs = None self.full_sap_epc = None self.metadata = None @@ -197,12 +213,19 @@ class SearchEpc: # These are the address and postcode values, which we store in the database self.address_clean = None self.postcode_clean = None + self.address_postal_town = None self.size = size if size is not None else 25 self.property_type = property_type self.fast = fast + # By default, this is set to false. This flag indicates whether we should overwrite SAP 2005 entires. + self.overwrite_sap05 = False + # Be default, this is set to false. This flag indicates whether we should take the existing EPC, but use + # the estimated EPC to clean missings + self.clean_missing_on_expired = False + def set_strict_property_type_search(self): """ This method sets the strict property type search flag to True. When this flag is set, the search will @@ -347,7 +370,8 @@ class SearchEpc: # We update the data with the correct uprn if self.uprn: for x in api_response["response"]["rows"]: - x["uprn"] = self.uprn + if pd.isnull(x["uprn"]): + x["uprn"] = self.uprn data["rows"].extend(api_response["response"]["rows"]) @@ -357,6 +381,8 @@ class SearchEpc: row for row in data["rows"] if row["lmk-key"] not in seen and not seen.add(row["lmk-key"]) ] + # Overwrite the data + self.data = data if data["rows"]: api_response["msg"] = self.SUCCESS @@ -415,12 +441,33 @@ class SearchEpc: address, [", ".join([r["address"]]) for r in rows], score_cutoff=0 ) # Pick the largest score - if best_match1[1] >= best_match2[1]: - # Get all of the scores - rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match1[0]] + if best_match1[1] == best_match2[1]: + # if thery're the same, we'll work under the assumption that the addresses are the same and we'll + # take whichever has the newest EPC + rows_filtered = [ + r for r in rows + if (", ".join([r["address"], r["posttown"]]) == best_match1[0]) or + (r["address"] == best_match2[0]) + ] + rows_filtered = [ + r for r in rows_filtered + if r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in rows_filtered]) + ] + + elif best_match1[1] > best_match2[1]: + # Get all of the scores - make sure we keep uprn + rows_filtered = [ + r for r in rows if + ( + (", ".join([r["address"], r["posttown"]]) == best_match1[0]) or + (str(r["uprn"]) == str(self.uprn)) + ) + ] else: # Get all of the scores - rows_filtered = [r for r in rows if r["address"] == best_match2[0]] + rows_filtered = [ + r for r in rows if (r["address"] == best_match2[0]) or (str(r["uprn"]) == str(self.uprn)) + ] # If we have multiple, we filter on newest lodgment date if len(rows_filtered) > 1: @@ -460,7 +507,11 @@ class SearchEpc: postcode = postcode.upper() - return address, postcode + # We also return a "postal town variant - useful for edge cases when fetching from find my EPC + address_postal_town = ", ".join( + [newest_epc["address1"], newest_epc["address2"], newest_epc["posttown"]]).strip().title() + + return address, postcode, address_postal_town def extract_epc_data(self, address=None): @@ -489,7 +540,7 @@ class SearchEpc: newest_epc, older_epcs = self.filter_newest_epc(list_of_epcs=rows) # Ge the uprn from the newest record for this home - uprns = {r["uprn"] for r in rows if r["uprn"]} + uprns = {str(r["uprn"]) for r in rows if r["uprn"]} # We can sometimes have no uprn for a property if (len(uprns) == 0) and len(rows) > 0: logger.warning("Found data but missing uprn") @@ -500,21 +551,42 @@ class SearchEpc: # Take the uprn from the most recent uprns = {newest_epc["uprn"]} else: - raise ValueError("Multiple UPRNs found - investigate me") + # We check if we have UPRNs that match the one we're given and if so, filter on those + if self.uprn is not None: + uprns = {u for u in uprns if int(u) == self.uprn} + if len(uprns) == 1: + logger.info( + f"Multiple UPRNs found but one matches provided UPRN {self.uprn}, using this UPRN" + ) + else: + raise ValueError("Multiple UPRNs found - investigate me") - if uprns: - uprn = uprns.pop() - else: - newest_epc["uprn-source"] = self.UPRN_SOURCE_SIMULATED - uprn = hash(self.address1 + self.postcode) + # if uprns: + # epc_uprn = uprns.pop() + # # Convert to int + # if not pd.isnull(epc_uprn): + # uprn = int(epc_uprn) + # else: + # newest_epc["uprn-source"] = self.UPRN_SOURCE_SIMULATED + # uprn = hash(self.address1 + self.postcode) + + if self.uprn is not None and uprns: + epc_uprn = uprns.pop() + if int(epc_uprn) != self.uprn: + logger.warning( + f"Provided UPRN {self.uprn} does not match EPC UPRN {epc_uprn}, using provided UPRN" + ) + # We overwrite but in this instance, we've likely got the wrong EPC data + # Insert as a string - same format as the raw data + newest_epc["uprn"] = str(self.uprn) if self.fast: - return newest_epc, [], {}, "", "", None + return newest_epc, [], {}, "", "", "" # Retrieve postcode and address - address_epc, postcode_epc = self.format_address(newest_epc=newest_epc) + address_epc, postcode_epc, address_postal_town = self.format_address(newest_epc=newest_epc) - return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn + return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, address_postal_town @staticmethod def filter_newest_epc(list_of_epcs: List): @@ -555,7 +627,9 @@ class SearchEpc: lmks_to_drop: list[str] | None = None, built_form: str = "", property_type: str = "", - exclude_old: bool = False + exclude_old: bool = False, + heating_system: [str, None] = None, + associated_uprns: [List[int] | None] = None ): """ Fetches and processes EPC data for a given initial postcode, applying successive trimming @@ -575,9 +649,13 @@ class SearchEpc: :param built_form: The 'built-form' value to be used for filtering the EPC data. :param property_type: The 'property-type' value to be used for filtering the EPC data. :param exclude_old: Flag to exclude EPC data older than 10 years. + :param heating_system: Optional heating system type for additional filtering. + :param associated_uprns: Optional list of associated UPRNs for additional filtering. :return: """ + associated_uprns_to_apply = [] if associated_uprns is None else associated_uprns.copy() + property_type_api_map = { "Bungalow": "bungalow", "Flat": "flat", @@ -594,7 +672,10 @@ class SearchEpc: params["property-type"] = property_type_api_map[property_type] # We take the 20 nearest homes of the relevant type, so not to pull in too many irrelevant homes - epc_response = self.get_epc(params=params, size=100) + # If we get to the final iteration, we fetch more + + size = 1000 if len(postcode) <= 2 else 100 + epc_response = self.get_epc(params=params, size=size) if epc_response["status"] == 200: epc_data = pd.DataFrame(self.data["rows"]) @@ -616,6 +697,17 @@ class SearchEpc: epc_data["lodgement-datetime"] > (pd.Timestamp.now() - pd.DateOffset(years=10)) ] + # Regardless of whether or not we exclude old, we drop any SAP05 entries, which will be problematic + # if we include them + if not epc_data.empty: + epc_data = epc_data[~epc_data["mainheat-description"].str.lower().str.contains("sap05:")] + + if not epc_data.empty and heating_system is not None: + # If we arrive at the final iteration, we allow ourself to be less strict on heating system + epc_data = epc_data[ + epc_data["mainheat-description"] == heating_system + ] + if not epc_data.empty: # Further processing of the EPC data @@ -661,6 +753,31 @@ class SearchEpc: estimation_built_form = "End-Terraced" elif (built_form == "") or (pd.isnull(built_form)): estimation_built_form = epc_built_form + elif built_form == "Enclosed Mid-Terrace": + # We check if we have any enclosed and if not, we fall back to mid-terrace + if sum(epc_data["built-form"] == "Enclosed Mid-Terrace") > 0: + estimation_built_form = "Enclosed Mid-Terrace" + else: + estimation_built_form = "Mid-Terrace" + elif built_form == "Enclosed End-Terrace": + # An enclosed end terrace has three two external facing walls so we fall back to mid-terrace + if sum(epc_data["built-form"] == "Enclosed End-Terrace") > 0: + estimation_built_form = "Enclosed Mid-Terrace" + else: + estimation_built_form = "Mid-Terrace" + elif built_form == "Detached" and property_type == "Flat": + # We add in a fallback to detached flats, where it can be rarer to see properties of this type + if len(postcode) <= 2: + if sum(epc_data["built-form"] == built_form) > 0: + estimation_built_form = built_form + elif sum(epc_data["built-form"] == "Semi-Detached") > 0: + estimation_built_form = "Semi-Detached" + elif sum(epc_data["built-form"] == "End-Terrace") > 0: + estimation_built_form = "End-Terrace" + else: + estimation_built_form = "Mid-Terrace" + else: + estimation_built_form = built_form else: estimation_built_form = built_form @@ -679,7 +796,16 @@ class SearchEpc: has_missing_built_form = not estimation_built_form - if is_maisonette_with_bad_built_form or is_park_home_without_built_form or has_missing_built_form: + # If we have associated UPRNS, we just filter as such, otherwise + # we filter with built form and property type + if any(str(x) in epc_data["uprn"].astype(str).values for x in associated_uprns_to_apply): + # We check at least one UPRN is in the data + epc_data = epc_data[epc_data["uprn"].isin(associated_uprns_to_apply)] + # After we run this, we empty associated_uprns_to_apply. + # That ensures we don't keep re-applying this filter if we shorten the postcode again + # since we'll keep ending up in the same results + associated_uprns_to_apply = [] + elif is_maisonette_with_bad_built_form or is_park_home_without_built_form or has_missing_built_form: epc_data = epc_data[epc_data["property-type"] == estimation_property_type] else: epc_data = epc_data[ @@ -696,7 +822,10 @@ class SearchEpc: # If loop finishes without a valid response, raise an exception raise Exception("Unable to find postcode data after trimming - investigate me") - def estimate_epc(self, property_type, built_form, lmks_to_drop=None, exclude_old=False): + def estimate_epc( + self, property_type, built_form, lmks_to_drop=None, exclude_old=False, heating_system=None, + associated_uprns=None + ): """ For a property that does not have an EPC, we retrieve the EPC data for the closest properties and estimate the EPC for the property in question. @@ -710,6 +839,9 @@ class SearchEpc: :param lmks_to_drop: This is a list of LMK keys that should be dropped from the estimation process. This is used as an override for testing, to drop EPCs for the property we are testing :param exclude_old: Used to drop any expired EPCs (more than 10 years old) + :param heating_system: The heating system of the property we are estimating, if known. Will aim to filter EPCs + to matching heating systems + :param associated_uprns: List of associated UPRNs for the property. E.g. other units in a block of flats :return: """ @@ -720,17 +852,20 @@ class SearchEpc: lmks_to_drop=lmks_to_drop, built_form=built_form, property_type=property_type, - exclude_old=exclude_old + exclude_old=exclude_old, + heating_system=heating_system, + associated_uprns=associated_uprns ) # Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build # so we avoid comparing it to new builds - # TODO - this is experimental + # TODO - this is experimental - if we have the year the property was built, we should utilise that + # here newer_age_bands = [ "England and Wales: 1996-2002", "England and Wales: 2003-2006", "England and Wales: 2007-2011", "England and Wales: 2012 onwards" ] - + # We also remove EPCs that are for new dwellings if (~epc_data["construction-age-band"].isin(newer_age_bands)).sum(): # We have some older age bands, so we need to filter them out epc_data = epc_data[~epc_data["construction-age-band"].isin(newer_age_bands)].copy() @@ -823,7 +958,7 @@ class SearchEpc: @staticmethod def calculate_weighted_lodgement_datetime(epc_data): - numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).view('int64') + numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).astype('int64') # Calculate the weighted sum of dates weighted_sum = (numeric_dates * epc_data['weight']).sum() @@ -862,7 +997,7 @@ class SearchEpc: return agg[key].values[0] - def find_property(self, skip_os=False): + def find_property(self, skip_os=False, api_data=None, overwrite_sap05=False): """ This method will attempt to identify a property. It will, at first, use the EPC api to try and find the EPC for the property and the associated UPRN. If this fails, it will use the Ordnance Survey API to @@ -873,27 +1008,95 @@ class SearchEpc: as a final check to see if there is any EPC data. If there is no EPC data, the epc data will be estimated based on the surrounding properties + + :param skip_os: If True, the ordnance survey api will be skipped and only the EPC api will be used + :param api_data: If provided, this data will be used instead of querying the EPC api + :param overwrite_sap05: For extrememly old, SAP05 EPCs, we may wish to overwrite them with an estimated EPC. + This is because the SAP05 EPCs will have missing information such as the main heating + will be described as SAP05:Main-Heating, which isn't particularly useful for the + purpose of providing recommendations. """ # Step 1: use the epc api to find the property and uprn - response = self.get_epc() + if api_data: + self.data = api_data + response = {"status": 200} + else: + response = self.get_epc() if response["status"] == 200: ( - self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn + self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, + self.address_postal_town ) = self.extract_epc_data(address=self.full_address) + + # Before we return, we check if we need to overwrite a SAP05 EPC + # ---- SAP 05 overwriting logic ---- + is_sap_05 = "SAP05:" in self.newest_epc.get("mainheat-description", "") + + needs_sap_05_overwrite = is_sap_05 and (response["status"] == 200) and overwrite_sap05 + + # ---- Cleaning expired EPC logic ---- + epc_is_expired = (pd.Timestamp.now() - pd.Timestamp( + self.newest_epc.get("lodgement-date", pd.Timestamp.now()))).days > 3650 + + epc_has_missing_key_data = any([self.newest_epc.get(k) in [None, ""] for k in self.CHECK_MISSING_KEYS]) + + epc_needs_cleaning = epc_is_expired and epc_has_missing_key_data + + # ---- We don't have an epc ---- + no_epc = response["status"] != 200 + + # If we don't have to overwrite SAP05, or we don't have missing data on an expired EPC, we return + if not needs_sap_05_overwrite and not epc_needs_cleaning and not no_epc: + # If the data is fine, or we're preventing SAP05 overwrites, we just exit here return + # By default, we don't exclude old but we will do, when we are estimating to overwrite a SAP05 EPC + lmks_to_drop, exclude_old = [], False + if needs_sap_05_overwrite or epc_needs_cleaning: + self.overwrite_sap05 = needs_sap_05_overwrite + self.clean_missing_on_expired = epc_needs_cleaning + lmks_to_drop = [self.newest_epc["lmk-key"]] + exclude_old = True + self.heating_system = ( + self.newest_epc["mainheat-description"] if + self.clean_missing_on_expired and self.heating_system is None else self.heating_system + ) + self.ordnance_survey_client.property_type = self.newest_epc["property-type"] + self.ordnance_survey_client.built_form = self.newest_epc["built-form"] + # Step 2: If we don't have an EPC, we use the ordnance survey api to find the uprn if skip_os: if self.ordnance_survey_client.property_type is not None: # We can try and estimate estimated_epc = self.estimate_epc( property_type=self.ordnance_survey_client.property_type, - built_form=self.ordnance_survey_client.built_form + built_form=self.ordnance_survey_client.built_form, + heating_system=self.heating_system, + associated_uprns=self.associated_uprns, + lmks_to_drop=lmks_to_drop, + exclude_old=exclude_old ) - self.newest_epc = estimated_epc - self.older_epcs = [] + + # If we have overwritten a SAP05 EPC, we need to update older_epcs too + if self.overwrite_sap05: + # We keep a record of the fact that we have performed a SAP05 overwrite + estimated_epc["sap_05_overwritten"] = True + self.older_epcs = [self.newest_epc.copy()] + self.newest_epc = estimated_epc + elif self.clean_missing_on_expired: + # We perform the cleaning + for k in self.CHECK_MISSING_KEYS: + if self.newest_epc[k] in ["", None]: + self.newest_epc[k] = estimated_epc[k] + + self.newest_epc["estimated"] = True + self.older_epcs = [] + else: + self.older_epcs = [] + self.newest_epc = estimated_epc + self.full_sap_epc = {} # Finally, set a standardised address 1 and postcode @@ -917,7 +1120,8 @@ class SearchEpc: response = self.get_epc() if response["status"] == 200: ( - self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn + self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, + self.address_postal_town ) = self.extract_epc_data() return @@ -936,6 +1140,22 @@ class SearchEpc: self.postcode_clean = self.ordnance_survey_client.postcode_os return + def set_uprn_source(self, file_format): + """ + Utility function to set the uprn source based on the file format. Only works for domna_asset_lists + and this is very much placeholder until we standardised our input data formats + :param file_format: + :return: + """ + + if not self.newest_epc: + raise ValueError("No EPC data available to set UPRN source - run find_property first") + + if (self.newest_epc.get("estimated") and + (file_format == "domna_asset_list") and + (float(self.newest_epc["uprn"]) < 0)): + self.newest_epc["uprn-source"] = self.UPRN_SOURCE_SIMULATED + def check_attribute_variations(self): attribute_map = { "walls-description": { @@ -993,7 +1213,7 @@ class SearchEpc: return "ground" def get_metadata(self): - if self.newest_epc is None: + if not self.newest_epc: raise ValueError("No EPC data available") # We check if the property has ever been downgraded on SAP diff --git a/backend/addresses/Address.py b/backend/addresses/Address.py new file mode 100644 index 00000000..9b95f5e0 --- /dev/null +++ b/backend/addresses/Address.py @@ -0,0 +1,67 @@ +from dataclasses import dataclass +from typing import Optional + + +@dataclass(slots=True) +class Address: + uprn: Optional[int] + landlord_property_id: Optional[str] + address: Optional[str] + full_address: Optional[str] + postcode: str + property_type: Optional[str] + built_form: Optional[str] + estimated: bool + + # Additional address data, associated to a standardised asset list + domna_full_address: Optional[str] + domna_address_1: Optional[str] + landlord_heating_system: Optional[str] = None + solar_reason: Optional[str] = None + cavity_reason: Optional[str] = None + + @property + def address1(self): + + if self.domna_address_1 is not None: + address1 = self.domna_address_1 + else: + address1 = self.address + + # Format + address1 = str(int(address1)) if isinstance(address1, float) else str(address1) + return address1 + + @property + def request_data(self) -> dict[str, Optional[str]]: + """ + Canonical request payload for downstream services. + """ + data = { + "uprn": self.uprn, + "landlord_property_id": self.landlord_property_id, + "postcode": self.postcode, + "address1": self.address1, + "full_address": self.full_address, + } + + # Drop nulls + return {k: v for k, v in data.items() if v is not None} + + @property + def heating_system(self): + """ + Helper function to extract a heating system, which can be used to estimate EPC. This is a very limited, + placeholder function to cover some initial immediate cases. + :return: + """ + + ll_heating = self.landlord_property_id + if not ll_heating: + return None + + if ll_heating == "electric storage heaters": + # Return with the same format at the EPC + return "Electric storage heaters" + + return None diff --git a/backend/addresses/Addresses.py b/backend/addresses/Addresses.py new file mode 100644 index 00000000..22822c6b --- /dev/null +++ b/backend/addresses/Addresses.py @@ -0,0 +1,84 @@ +from backend.addresses.Address import Address + + +class Addresses: + def __init__(self, addresses: list[Address]): + self._addresses = addresses + # self._identity_index = self._build_identity_index() + + def __getitem__(self, index: int) -> Address: + return self._addresses[index] + + def __len__(self) -> int: + return len(self._addresses) + + @classmethod + def from_plan_input(cls, plan_input: list[dict], body) -> "Addresses": + addresses = [] + for row in plan_input: + addresses.append(cls._parse_row(row, body)) + return cls(addresses) + + def get_uprns(self): + return [x.uprn for x in self._addresses if x.uprn is not None] + + def get_landlord_ids(self): + return [x.landlord_property_id for x in self._addresses if x.landlord_property_id is not None] + + def get_unique_postcodes(self): + return list({x.postcode for x in self._addresses}) + + def get_postcodes_for_flats(self): + # Method to extract all of the postcodes associated to a flat, which is used for remote assessments + # on flats + return [x.postcode for x in self._addresses if x.property_type in ["Flat", "flat"]] + + def get_property_requests(self): + return [x.request_data for x in self._addresses] + + @staticmethod + def _parse_row(row: dict, body) -> Address: + def clean_uprn(v): + try: + return int(float(v)) + except (TypeError, ValueError): + return None + + uprn = clean_uprn(row.get("uprn")) + + address = row.get("address") + if not address and body.file_format == "domna_asset_list": + address = row.get("domna_address_1") + + full_address = ( + row.get("domna_full_address") + if body.file_format == "domna_asset_list" + else None + ) + if not isinstance(full_address, str): + full_address = None + + postcode = str(row["postcode"]).strip().upper() + + return Address( + uprn=uprn, + landlord_property_id=str(row["landlord_property_id"]) + if row.get("landlord_property_id") else None, + address=str(address).strip() if address else None, + full_address=str(full_address).strip() if full_address else None, + postcode=postcode, + property_type=row.get("property_type"), + built_form=row.get("built_form"), + estimated=bool(row.get("estimated", False)), + domna_full_address=row.get("domna_full_address"), + domna_address_1=row.get("domna_address_1"), + ) + + # def _build_identity_index(self) -> dict: + # index = {} + # for addr in self._addresses: + # key = addr.identity_key() + # if key in index: + # raise ValueError(f"Duplicate address identity detected: {key}") + # index[key] = addr + # return index diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index 043f41a9..f7aa311f 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -332,7 +332,6 @@ class GoogleSolarApi: ) if solar_product is None: - logger.info("No suitable solar product found for the configuration with %d panels.", total_panels) continue total_cost = Costs.solar_pv( @@ -480,9 +479,7 @@ class GoogleSolarApi: roi_results = pd.DataFrame(roi_results) - panel_performance = panel_performance.merge( - roi_results, how="left", on="n_panels" - ) + panel_performance = panel_performance.merge(roi_results, how="left", on="n_panels") # We want max roi, minimal generation deficit, and max generation value - we create a ranking score # Assign equal weights to each metric @@ -707,7 +704,7 @@ class GoogleSolarApi: # We set the target rating to EPC C, which is the typical EPC rating we would expect the # property to achieve post retrofit of just the fabric "energy_consumption": cls.estimate_new_consumption( - current_energy_efficiency=p.data["current-energy-efficiency"], + current_energy_efficiency=min(p.data["current-energy-efficiency"], 100), target_efficiency="69", current_consumption=p.estimate_electrical_consumption( assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions @@ -726,7 +723,7 @@ class GoogleSolarApi: # We set the target rating to EPC C, which is the typical EPC rating we would expect the # property to achieve post retrofit of just the fabric "energy_consumption": cls.estimate_new_consumption( - current_energy_efficiency=p.data["current-energy-efficiency"], + current_energy_efficiency=min(int(p.data["current-energy-efficiency"]), 100), target_efficiency="69", current_consumption=p.estimate_electrical_consumption( assumed_ashp_efficiency=assumptions.AVERAGE_ASHP_EFFICIENCY, exclusions=body.exclusions @@ -743,7 +740,7 @@ class GoogleSolarApi: @classmethod def building_solar_analysis( cls, building_solar_config: List, input_properties: List[Property], session, google_solar_api_key: str, - solar_materials: list + solar_materials: list, ): """ Perform the solar analysis for the building level @@ -827,9 +824,21 @@ class GoogleSolarApi: @classmethod def unit_solar_analysis( cls, unit_solar_config: List, input_properties: List[Property], session, body, google_solar_api_key: str, - solar_materials: list + solar_materials: list, inspections_map: dict ): + """ + Perform the solar analysis for the unit level + :param unit_solar_config: List of unit solar configurations + :param input_properties: List of properties + :param session: Database session + :param body: PlanTriggerRequest instance + :param google_solar_api_key: Google Solar API key + :param solar_materials: List of solar materials + :param inspections_map: Dictionary mapping property IDs to inspection data + :return: + """ + if not unit_solar_config: return input_properties @@ -855,18 +864,21 @@ class GoogleSolarApi: ): continue + solar_api_client = cls(api_key=google_solar_api_key, solar_materials=solar_materials) + if unit["longitude"] is None or unit["latitude"] is None: # At this point, we've checked that solar PV is valid, and so we provide some defaults property_instance.set_solar_panel_configuration( solar_panel_configuration={ "insights_data": None, - "panel_performance": cls.default_panel_performance(property_instance=property_instance), + "panel_performance": solar_api_client.default_panel_performance( + property_instance=property_instance + ), "unit_share_of_energy": 1 }, ) continue - solar_api_client = cls(api_key=google_solar_api_key, solar_materials=solar_materials) solar_api_client.get( longitude=unit["longitude"], latitude=unit["latitude"], @@ -877,6 +889,15 @@ class GoogleSolarApi: property_instance=property_instance, ) + property_inspections = inspections_map.get(property_instance.id, {}) + + if property_inspections: + # If we have some inspections data, we check if we have some data which indicates solar cannot + # be installed. We're loose about this now since this is post review + if solar_api_client.panel_performance.empty: + # We assume solar is a suitable option + solar_api_client.panel_performance = solar_api_client.default_panel_performance(property_instance) + # Store the data in the database solar_api_client.save_to_db( session=session, @@ -921,12 +942,43 @@ class GoogleSolarApi: None ) - if material_1_6 is None or material_3_2 is None: + material_4_35 = next( + (m for m in self.solar_materials if m["type"] == "solar_pv" and + abs(m["size"] - 4.35) < 0.1 and not m["includes_battery"]), + None + ) + + if material_1_6 is None or material_3_2 is None or material_4_35 is None: raise ValueError("No suitable solar product found for the default configuration.") # We return a 1.6 and 3.2 kwp system panel_performance = pd.DataFrame( [ + { + 'n_panels': 10, + 'yearly_dc_energy': 4350 * assumptions.MEDIAN_WATTAGE_TO_DC, + 'total_cost': cost_instance.solar_pv( + solar_product=material_4_35, + scaffolding_options=[ + {"total_cost": 1000, "size": property_instance.number_of_floors}, + {"total_cost": 1000, "size": 3} + ], + n_floors=property_instance.number_of_floors + )["total"], + 'weighted_ratio': None, + 'panneled_roof_area': 9 * assumptions.RDSAP_AREA_PER_PANEL, + 'array_wattage': 4350, + 'initial_ac_kwh_per_year': 4350 * assumptions.MEDIAN_WATTAGE_TO_AC, + 'lifetime_ac_kwh': None, + 'lifetime_dc_kwh': None, + 'roi': None, + 'generation_value': None, + 'generation_deficit': None, + 'expected_payback_years': None, + 'surplus': None, + 'combined_score': None, + 'rank': None + }, { 'n_panels': 8, 'yearly_dc_energy': 3200 * assumptions.MEDIAN_WATTAGE_TO_DC, @@ -979,4 +1031,22 @@ class GoogleSolarApi: }, ] ) + + # We add the key elements that are required for the database + panel_performance['lifetime_ac_kwh'] = panel_performance.apply( + self.lifetime_production_kwh, + axis=1, + efficiency_depreciation_factor=self.efficiency_depreciation_factor, + installation_life_span=self.installation_life_span, + column_name="initial_ac_kwh_per_year" + ) + + panel_performance['lifetime_dc_kwh'] = panel_performance.apply( + self.lifetime_production_kwh, + axis=1, + efficiency_depreciation_factor=self.efficiency_depreciation_factor, + installation_life_span=self.installation_life_span, + column_name="yearly_dc_energy", + ) + return panel_performance diff --git a/backend/app/BatterySapScorer.py b/backend/app/BatterySapScorer.py new file mode 100644 index 00000000..923c5498 --- /dev/null +++ b/backend/app/BatterySapScorer.py @@ -0,0 +1,30 @@ +import numpy as np + + +class BatterySAPScorer: + """ + Lightweight production scorer — no sklearn dependency. + Uses hard-coded coefficients discovered offline. The code for discovering the coefficients + can be found in etl/battery_model/train.py + We're only concerned with SAP, as we already have a method for carbon and bill savings. + """ + + INTERCEPT = 10.310168559226678 + COEF_STARTING_SAP = -0.16120648633993315 + COEF_PV_SIZE = 1.0500492005420736 + + @classmethod + def score(cls, starting_sap, pv_size): + """ + heating_system: string used to infer is_electric + """ + + sap_uplift = ( + cls.INTERCEPT + + cls.COEF_STARTING_SAP * starting_sap + + cls.COEF_PV_SIZE * pv_size + ) + + # Round + clamp to [1,5] - there are only a small number of cases with 0 points + sap_uplift = int(np.round(np.clip(sap_uplift, 1, 5))) + return sap_uplift diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index a0234f75..19263bb3 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -77,7 +77,24 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "Electric ceiling heating, electric": {"fuel": "Electricity", "cop": 1}, "Air source heat pump, warm air, electric": { "fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100 - } + }, + "Electric heat pump for water heating only": {"fuel": "Electricity", "cop": 1}, + "Ground source heat pump, warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, + "Room heaters, mains gas, Electric storage heaters": {"fuel": "Natural Gas", "cop": 0.85}, + "Water source heat pump, radiators, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, + "Air source heat pump, Systems with radiators, electric": {"fuel": "Electricity", + "cop": AVERAGE_ASHP_EFFICIENCY / 100}, + "Ground source heat pump, underfloor, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, + "Electric ceiling heating": {"fuel": "Electricity", "cop": 1}, + "Boiler and radiators, wood chips": {"fuel": "Wood Logs", "cop": 0.85}, + "Oil range cooker, no cylinder thermostat": {"fuel": "Oil", "cop": 0.85}, + "Air source heat pump, Warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, + "Boiler and underfloor heating, electric": {"fuel": "Electricity", "cop": 1}, + "Community scheme with CHP, mains gas": {"fuel": "Natural Gas", "cop": 0.85}, + "Air source heat pump, radiators and underfloor, electric": {"fuel": "Electricity", + "cop": AVERAGE_ASHP_EFFICIENCY / 100}, + "Electric ceiling heating, radiators, electric": {"fuel": "Electricity", "cop": 1}, + "Boiler and underfloor heating, mains gas, Boiler and radiators, mains gas": {"fuel": "Natural Gas", "cop": 0.85}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it @@ -88,3 +105,12 @@ measures_needing_ventilation = [ # If we have a property beyond this size, we assume it's likely large enough to have an ASHP ASHP_FLOOR_AREA_THRESHOLD = 120 # m2 + +# Is a placeholder, used for cleaning data. Is a flat average based on the estimated +AVERAGE_LIGHTING_COST = 100 + +# Average bill, based on british gas is #1,838.71. Subtract 100 for lighting, 228 for hot water. This will include +# appliances so appliances should be removed when this is used +AVERAGE_HEATING_AND_APPLIANCE_COST = 1510.71 +# Based on https://energysavingtrust.org.uk/sites/default/files/reports/AtHomewithWater%287%29.pdf +AVERAGE_HOT_WATER_COST = 228 diff --git a/backend/app/config.py b/backend/app/config.py index b53d5223..dd3f5db1 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,5 +1,6 @@ from functools import lru_cache from pydantic_settings import BaseSettings +from typing import Optional class Settings(BaseSettings): @@ -35,6 +36,11 @@ class Settings(BaseSettings): # Other S3 buckts ENERGY_ASSESSMENTS_BUCKET: str + # Optional AWS creds (only required in local) + AWS_ACCESS_KEY_ID: Optional[str] = None + AWS_SECRET_KEY_ID: Optional[str] = None + AWS_DEFAULT_REGION: Optional[str] = None + class Config: env_file = "backend/.env" diff --git a/backend/app/db/connection.py b/backend/app/db/connection.py index 9efdfd25..74f3bd2e 100644 --- a/backend/app/db/connection.py +++ b/backend/app/db/connection.py @@ -1,5 +1,7 @@ from sqlalchemy import create_engine +from contextlib import contextmanager from backend.app.config import get_settings +from sqlmodel import Session connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}" db_string = connection_string.format( @@ -11,4 +13,42 @@ db_string = connection_string.format( dbname=get_settings().DB_NAME, ) -db_engine = create_engine(db_string, pool_size=5, max_overflow=5) +# db_engine = create_engine(db_string, pool_size=5, max_overflow=5) + +# Adjusted database connection to decease pool size for serverless environments (from lambda) so that +# each lambda doesn't hog all connections +db_engine = create_engine( + db_string, + pool_size=3, + max_overflow=5, # Limit the number of extra connections. With this and pool size, we allow 1 connection per lambda + pool_pre_ping=True, + pool_recycle=300, # Forces SQLAlchemy to close and reopen any connection older than 300 seconds +) + + +def get_db_session(): + if db_engine is None: + raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.") + return Session(db_engine) + + +@contextmanager +def db_session(): + session = Session(db_engine) + try: + yield session + session.commit() + except Exception: + session.rollback() + raise + finally: + session.close() + + +@contextmanager +def db_read_session(): + session = Session(db_engine, expire_on_commit=False) + try: + yield session + finally: + session.close() diff --git a/backend/app/db/functions/__init__.py b/backend/app/db/functions/__init__.py new file mode 100644 index 00000000..8e7495bf --- /dev/null +++ b/backend/app/db/functions/__init__.py @@ -0,0 +1,13 @@ +from .epc_functions import * +from .address_functions import * +from .portfolio_functions import * +from .energy_assessment_functions import * +from .property_functions import * +from .recommendations_functions import * +from .solar_functions import * +from .funding_functions import * +from .materials_functions import * +from .inspections_functions import * +from .non_intrusive_surveys import * +from .whlg_functions import * +from .already_installed_functions import * diff --git a/backend/app/db/functions/address_functions.py b/backend/app/db/functions/address_functions.py new file mode 100644 index 00000000..4b8ad5f2 --- /dev/null +++ b/backend/app/db/functions/address_functions.py @@ -0,0 +1,114 @@ +from sqlalchemy.orm import Session +from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy import func +from backend.app.db.models.addresses import PostcodeSearch +from utils.logger import setup_logger + +logger = setup_logger() + + +def _get_associated_records(results, uprn, uprn_key="UPRN"): + matched_record = [] + for x in results: + if "DPA" in x: + if x["DPA"].get(uprn_key) == str(uprn): + matched_record.append(x["DPA"]) + else: + if x["LPI"].get(uprn_key) == str(uprn): + matched_record.append(x["LPI"]) + + return matched_record + + +def get_associated_uprns(postcode_search: PostcodeSearch, uprn: str | int): + """ + Given a postcode and UPRN, for a remote assessment, fetch all associated UPRNs, based + on parent UPRN. This will be properties in the same building + + Parent UPRN is referenced in the following docs: + https://static.geoplace.co.uk/downloads/GeoPlace-Data-Entry-Conventions-Best-Practice-for-Addresses.pdf + + :param PostcodeSearch postcode_search: The postcode search record + :param uprn: The UPRN string to match + :return: The matching PostcodeSearch record, or None if not found + """ + + if not postcode_search: + return [] + + if isinstance(uprn, int): + # For this, coerce to string + uprn = str(uprn) + + matched_record = _get_associated_records(results=postcode_search.result_data["results"], uprn=uprn) + + if len(matched_record) != 1: + return [] + + if not matched_record[0].get("PARENT_UPRN"): + logger.info("No parent UPRN found, cannot get associated records") + return [] + + associated_records = _get_associated_records( + results=postcode_search.result_data["results"], uprn=matched_record[0]["PARENT_UPRN"], uprn_key="PARENT_UPRN" + ) + # We now fetch all UPRNS with the same parent UPRN + associated_uprns = [int(x["UPRN"]) for x in associated_records if x["UPRN"] != str(uprn)] + + return associated_uprns + + +def get_by_postcodes(session: Session, postcodes: list[str]) -> dict[str, PostcodeSearch]: + """ + Given a list of postcodes, retrieves postcode data from the database form the PostcodeSearch table + :param session: + :param postcodes: + :return: + """ + if not postcodes: + return {} + + normalised = {p.upper() for p in postcodes if p} + + records = ( + session.query(PostcodeSearch) + .filter(func.upper(PostcodeSearch.postcode).in_(normalised)) + .all() + ) + + return {r.postcode.upper(): r for r in records} + + +def get_associated_uprns_from_record(record: PostcodeSearch, uprn: str) -> list[int]: + """ + Given the postcode sra + :param record: + :param uprn: + :return: + """ + if not record: + return [] + + matched_record = _get_associated_records( + results=record.result_data["results"], + uprn=uprn + ) + + if len(matched_record) != 1: + return [] + + parent_uprn = matched_record[0].get("PARENT_UPRN") + if not parent_uprn: + return [] + + associated_records = _get_associated_records( + results=record.result_data["results"], + uprn=parent_uprn, + uprn_key="PARENT_UPRN" + ) + + return [ + int(x["UPRN"]) + for x in associated_records + if x["UPRN"] != str(uprn) + ] diff --git a/backend/app/db/functions/already_installed_functions.py b/backend/app/db/functions/already_installed_functions.py new file mode 100644 index 00000000..351419b0 --- /dev/null +++ b/backend/app/db/functions/already_installed_functions.py @@ -0,0 +1,40 @@ +from backend.app.db.models.recommendations import InstalledMeasure +from typing import Dict, List, Set +from collections import defaultdict + + +def get_installed_measure_types_by_uprns( + session, + uprns: List[int], +) -> Dict[int, Set[str]]: + """ + Returns installed measure types per UPRN. + + { + uprn: {"cavity_wall_insulation", "mechanical_ventilation", ...} + } + """ + + if not uprns: + return {} + + rows = ( + session.query( + InstalledMeasure.uprn, + InstalledMeasure.measure_type, + ) + .filter(InstalledMeasure.is_active.is_(True)) + .filter(InstalledMeasure.uprn.in_(uprns)) + .all() + ) + + out: Dict[int, Set[str]] = defaultdict(set) + + for uprn, measure_type in rows: + out[uprn].add( + measure_type.value + if hasattr(measure_type, "value") + else measure_type + ) + + return out diff --git a/backend/app/db/functions/energy_assessment_functions.py b/backend/app/db/functions/energy_assessment_functions.py index bbdaaac7..c9e40b3f 100644 --- a/backend/app/db/functions/energy_assessment_functions.py +++ b/backend/app/db/functions/energy_assessment_functions.py @@ -1,3 +1,4 @@ +from typing import Iterable from backend.app.db.models.energy_assessments import ( EnergyAssessment, EnergyAssessmentScenarios, EnergyAssessmentDocuments, DocumentTypeEnum ) @@ -63,27 +64,48 @@ def bulk_insert_energy_assessments(session: Session, data_list: List[dict]) -> D return uprn_to_assessment_id -def get_latest_assessment_by_uprn(session: Session, uprn: int) -> Optional[EnergyAssessment]: +def get_latest_assessments_for_uprns( + session: Session, + uprns: Iterable[int], +) -> dict[int, dict]: """ - Retrieve the latest energy assessment for a given UPRN based on the inspection date. + Fetch the latest energy assessment per UPRN in a single query. - :param session: The database session - :param uprn: The unique property reference number - :return: The latest EnergyAssessment object or None if not found + Returns a dict: + uprn -> assessment_dict | empty_response """ - if not uprn: - return EnergyAssessment.empty_response() + uprns = [u for u in uprns if u] + if not uprns: + return {} - try: - # Query the EnergyAssessment model, filter by uprn, order by inspection_date in descending order - latest_assessment = session.query(EnergyAssessment).filter_by(uprn=uprn).order_by( - desc(EnergyAssessment.inspection_date)).first() + # DISTINCT ON requires matching ORDER BY + records = ( + session.query(EnergyAssessment) + .filter(EnergyAssessment.uprn.in_(uprns)) + .order_by( + EnergyAssessment.uprn, + desc(EnergyAssessment.inspection_date), + ) + .distinct(EnergyAssessment.uprn) + .all() + ) - return latest_assessment.to_dict() if latest_assessment else EnergyAssessment.empty_response() - except Exception as e: - logger.info(f"An error occurred: {e}") - return None + result: dict[int, dict] = {} + + for record in records: + result[record.uprn] = record.to_dict() + + # Fill missing uprns with empty response + uprn_set = set(uprns) + found_set = set(result.keys()) + + missing_uprns = uprn_set - found_set + + for uprn in missing_uprns: + result[uprn] = EnergyAssessment.empty_response() + + return result def create_scenarios_for_documents(session: Session, document_list: List[dict], uprn_to_assessment_id: dict): diff --git a/backend/app/db/functions/epc_functions.py b/backend/app/db/functions/epc_functions.py new file mode 100644 index 00000000..defc24c9 --- /dev/null +++ b/backend/app/db/functions/epc_functions.py @@ -0,0 +1,229 @@ +from typing import List +from datetime import datetime, timedelta, timezone +from sqlalchemy.exc import SQLAlchemyError +from backend.app.db.models.epc import EpcStore +from sqlmodel import Session +from sqlalchemy.dialects.postgresql import insert + + +class EpcStoreService: + """ + Service layer for EPC data lookup and persistence. + """ + + FRESHNESS_DAYS = 30 + + # status labels + FRESH = "fresh" + EXPIRED = "expired" + MISSING = "missing" + + @classmethod + def get_epc_for_uprn(cls, session: Session, uprn: int): + """ + Query EPC data for a given UPRN and return a dict describing: + - epc_api: only if within last 30 days + - epc_page: only if epc_api exists + - status: 'fresh', 'expired', or 'missing' + """ + + record = session.query(EpcStore).filter(EpcStore.uprn == uprn).first() + + if not record: + return {"status": cls.MISSING, "epc_api": None, "epc_page": None} + + if not record.epc_api_created_at: + # API data missing → treat as missing even if page data exists + return {"status": cls.MISSING, "epc_api": None, "epc_page": None} + + # check freshness + cutoff = datetime.now(timezone.utc) - timedelta(days=EpcStoreService.FRESHNESS_DAYS) + + if record.epc_api_created_at.date() < cutoff.date(): + return {"status": cls.EXPIRED, "epc_api": None, "epc_page": None} + + # Fresh API → include page only if present + return { + "status": cls.FRESH, + "epc_api": record.epc_api, + "epc_page": record.epc_page if record.epc_page else None, + "epc_page_rrn": record.epc_page_rrn, + "epc_api_created_at": record.epc_api_created_at, + "epc_page_created_at": record.epc_page_created_at, + } + + @classmethod + def get_epcs_for_uprns(cls, session: Session, uprns: List[int]) -> dict[int, dict]: + """ + Given a list of uprns, return a dict mapping each uprn to its EPC data status and content. + :param session: + :param uprns: + :return: + """ + if not uprns: + return {} + + cutoff = datetime.now(timezone.utc) - timedelta(days=cls.FRESHNESS_DAYS) + + records = ( + session.query(EpcStore) + .filter(EpcStore.uprn.in_(uprns)) + .all() + ) + + result: dict[int, dict] = {} + + for record in records: + if not record.epc_api_created_at: + result[record.uprn] = { + "status": cls.MISSING, + "epc_api": None, + "epc_page": None, + "epc_page_rrn": None, + "epc_api_created_at": None, + "epc_page_created_at": None, + } + continue + + if record.epc_api_created_at.date() < cutoff.date(): + # We only expose epc_page when epc_api is fresh. + result[record.uprn] = { + "status": cls.EXPIRED, + "epc_api": None, + "epc_page": None, + "epc_page_rrn": None, + "epc_api_created_at": None, + "epc_page_created_at": None, + } + continue + + result[record.uprn] = { + "status": cls.FRESH, + "epc_api": record.epc_api, + "epc_page": record.epc_page, + "epc_page_rrn": record.epc_page_rrn, + "epc_api_created_at": record.epc_api_created_at, + "epc_page_created_at": record.epc_page_created_at, + } + + # For the uprns not found in records, mark them as missing + requested = set(uprns) + found = set(result.keys()) + + missing = requested - found + for uprn in missing: + result[uprn] = { + "status": cls.MISSING, + "epc_api": None, + "epc_page": None, + "epc_page_rrn": None, + "epc_api_created_at": None, + "epc_page_created_at": None, + } + + return result + + @classmethod + def check_insert_needed(cls, epc_cache, epc_estimated, uprn): + """ + Check if an insert is needed based on existing data. + :return: + """ + no_existing_epc_cache = epc_cache.get("epc_api") is None + existing_cache_expired = ( + epc_cache.get("status") == cls.EXPIRED + ) + + needs_insert = bool((no_existing_epc_cache or existing_cache_expired) and not epc_estimated and uprn) + + return needs_insert + + @staticmethod + def upsert_epc_data( + session: Session, + uprn: int, + epc_api: dict | None, + epc_page: str | None, + epc_page_rrn: str | None, + epc_api_created_at: datetime | None = None, + epc_page_created_at: datetime | None = None, + ): + """ + Insert or update EPC data for a UPRN. + + Rules: + - If record exists → update it + - If record does not exist → create new + """ + + try: + record = session.query(EpcStore).filter(EpcStore.uprn == uprn).first() + + if record: + # update path + if epc_api is not None: + record.epc_api = epc_api + if epc_api_created_at is None: + epc_api_created_at = datetime.now(timezone.utc) + record.epc_api_created_at = epc_api_created_at + + # update page data only if BOTH: + # 1) the caller passed page data + # 2) epc_api is not None (page only allowed when API exists) + if epc_page is not None and epc_api is not None: + record.epc_page = epc_page + record.epc_page_rrn = epc_page_rrn + if epc_page_created_at is None: + epc_page_created_at = datetime.now(timezone.utc) + record.epc_page_created_at = epc_page_created_at + else: + # insert path + record = EpcStore( + uprn=uprn, + epc_api=epc_api, + epc_api_created_at=epc_api_created_at, + epc_page=epc_page if epc_api is not None else None, + epc_page_rrn=epc_page_rrn if epc_api is not None else None, + epc_page_created_at=epc_page_created_at if epc_api is not None else None, + ) + session.add(record) + + return record + + except SQLAlchemyError as e: + raise e + + @classmethod + def bulk_upsert_epc_data(cls, session: Session, rows_to_insert: list[dict]): + if not rows_to_insert: + return + + now = datetime.now(timezone.utc) + + values = [ + { + "uprn": row["uprn"], + "epc_api": row["epc_api"], + "epc_api_created_at": now, + "epc_page": row["epc_page"], + "epc_page_rrn": row["epc_page_rrn"], + "epc_page_created_at": now if row["epc_page"] else None, + } + for row in rows_to_insert + ] + + insert_stmt = insert(EpcStore).values(values) + + stmt = insert_stmt.on_conflict_do_update( + index_elements=["uprn"], + set_={ + "epc_api": insert_stmt.excluded.epc_api, + "epc_api_created_at": insert_stmt.excluded.epc_api_created_at, + "epc_page": insert_stmt.excluded.epc_page, + "epc_page_rrn": insert_stmt.excluded.epc_page_rrn, + "epc_page_created_at": insert_stmt.excluded.epc_page_created_at, + }, + ) + + session.execute(stmt) + session.commit() diff --git a/backend/app/db/functions/funding_functions.py b/backend/app/db/functions/funding_functions.py index 51dffa21..df36d308 100644 --- a/backend/app/db/functions/funding_functions.py +++ b/backend/app/db/functions/funding_functions.py @@ -1,5 +1,6 @@ from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError +from sqlalchemy import insert from backend.app.db.models.funding import FundingPackage, FundingPackageMeasures @@ -69,3 +70,72 @@ def upload_funding(session: Session, p, plan_id, recommendations_to_upload): session.rollback() print(f"An error occurred: {e}") return False + + +def bulk_upload_funding_packages( + session: Session, + funding_payload: list[dict], +): + """ + Bulk upload: + - funding_package + - funding_package_measures + + Assumes caller manages the transaction. + """ + + if not funding_payload: + return + + # --------------------------------------------------------- + # 1. Prepare funding package rows + # --------------------------------------------------------- + funding_rows = [] + measures_by_index = [] + + for f in funding_payload: + funding_rows.append({ + "plan_id": f["plan_id"], + "scheme": f["scheme"], + "project_funding": f["project_funding"], + "total_uplift": f["total_uplift"], + "full_project_score": f["full_project_score"], + "partial_project_score": f["partial_project_score"], + "uplift_project_score": f["uplift_project_score"], + }) + + measures_by_index.append(f.get("measures", [])) + + # --------------------------------------------------------- + # 2. Insert funding packages and get IDs + # --------------------------------------------------------- + result = session.execute( + insert(FundingPackage) + .values(funding_rows) + .returning(FundingPackage.id) + ) + + funding_package_ids = [row[0] for row in result] + + # --------------------------------------------------------- + # 3. Insert funding package measures + # --------------------------------------------------------- + measures_rows = [] + + for funding_package_id, measures in zip( + funding_package_ids, measures_by_index + ): + for m in measures: + measures_rows.append({ + "funding_package_id": funding_package_id, + "measure": m["measure"], + "material_id": m["material_id"], + "innovation_uplift": m["innovation_uplift"], + "partial_project_score": m["partial_project_score"], + "uplift_project_score": m["uplift_project_score"], + }) + + if measures_rows: + session.execute( + insert(FundingPackageMeasures).values(measures_rows) + ) diff --git a/backend/app/db/functions/inspections_functions.py b/backend/app/db/functions/inspections_functions.py new file mode 100644 index 00000000..b1c1eeb5 --- /dev/null +++ b/backend/app/db/functions/inspections_functions.py @@ -0,0 +1,212 @@ +import re +from dataclasses import dataclass, asdict +from typing import Optional, Dict, Any, Type, TypeVar +from sqlalchemy.orm import Session + +from enum import Enum +from datetime import datetime, timedelta +import math +import pytz +import enum + +from backend.app.db.models.inspections import ( + InspectionModel, + InspectionArchetype, + InspectionArchetype2, + InspectionsWallConstruction, + InspectionsWallInsulation, + InspectionsInsulationMaterial, + InspectionBorescoped, + InspectionsRoofOrientation, + InspectionsTileHung, + InspectionsRendered, + InspectionsCladding, + InspectionsAccessIssues, +) + +NON_INTRUSIVE_PREFIX = "non-intrusives:" + + +@dataclass +class InspectionData: + archetype: Optional[InspectionArchetype] = None + archetype_2: Optional[InspectionArchetype2] = None + wall_construction: Optional[InspectionsWallConstruction] = None + insulation: Optional[InspectionsWallInsulation] = None + insulation_material: Optional[InspectionsInsulationMaterial] = None + borescoped: Optional[InspectionBorescoped] = None + roof_orientation: Optional[InspectionsRoofOrientation] = None + tile_hung: Optional[InspectionsTileHung] = None + rendered: Optional[InspectionsRendered] = None + cladding: Optional[InspectionsCladding] = None + access_issues: Optional[InspectionsAccessIssues] = None + date: Optional[datetime] = None # Reflects the date when the survey was actually conducted + notes: Optional[str] = None + surveyor_name: Optional[str] = None + + +def _clean_string(value: Any) -> Optional[str]: + """Normalize strings for enum matching, tolerant of NaN/None.""" + if value is None: + return None + if isinstance(value, float) and math.isnan(value): + return None + if not isinstance(value, str): + return None + + v = ( + value.strip() + .lower() + .replace("“", '"') + .replace("”", '"') + .replace("’", "'") + ) + return re.sub(r"\s+", " ", v) + + +E = TypeVar("E", bound=Enum) + + +def _match_enum(value: Any, enum_cls: Type[E]) -> Optional[E]: + """Case-insensitive fuzzy matching for enums, tolerant of NaN/None.""" + v = _clean_string(value) + if not v: + return None + + for e in enum_cls: + if v == e.value.lower(): + return e + + for e in enum_cls: + if v in e.value.lower() or e.value.lower() in v: + return e + + return None + + +def _lower_key_dict(d: dict) -> dict: + """Convert all keys to lowercase for case-insensitive lookup.""" + return {str(k).lower(): v for k, v in d.items() if isinstance(k, str)} + + +def extract_inspection_data(config: Dict[str, Any]) -> Optional[InspectionData]: + """Extract and map inspection data from a config row.""" + config_lower = _lower_key_dict(config) + + non_intrusive_fields = { + k: v for k, v in config_lower.items() + if k.startswith(NON_INTRUSIVE_PREFIX) + } + + if not non_intrusive_fields: + return None + + data = InspectionData() + + data.archetype = _match_enum( + config_lower.get("non-intrusives: archetype"), InspectionArchetype + ) + data.archetype_2 = _match_enum( + config_lower.get("non-intrusives: archetype 2"), InspectionArchetype2 + ) + data.wall_construction = _match_enum( + config_lower.get("non-intrusives: construction"), InspectionsWallConstruction + ) + data.insulation = _match_enum( + config_lower.get("non-intrusives: insulated"), InspectionsWallInsulation + ) + data.insulation_material = _match_enum( + config_lower.get("non-intrusives: material"), InspectionsInsulationMaterial + ) + data.borescoped = _match_enum( + config_lower.get("non-intrusives: boroscoped?"), InspectionBorescoped + ) + data.roof_orientation = _match_enum( + config_lower.get("non-intrusives: roof orientation"), InspectionsRoofOrientation + ) + data.tile_hung = _match_enum( + config_lower.get("non-intrusives: tile hung"), InspectionsTileHung + ) + data.rendered = _match_enum( + config_lower.get("non-intrusives: rendered"), InspectionsRendered + ) + data.cladding = _match_enum( + config_lower.get("non-intrusives: cladding"), InspectionsCladding + ) + data.access_issues = _match_enum( + config_lower.get("non-intrusives: access issues"), InspectionsAccessIssues + ) + + data.date = config_lower.get("non-intrusives: date") + data.notes = config_lower.get("non-intrusives: further surveyor notes") + # convert surveyor name to title case if present + data.surveyor_name = config_lower.get("non-intrusives: name of surveyor").title() if config_lower.get( + "non-intrusives: name of surveyor") else None + + return data + + +def bulk_upsert_inspections_pg(session: Session, inspections_map): + """ + Bulk insert/update inspection records: + - 'created_at' = actual survey date + - 'uploaded_at' = time of upload or update + - If an inspection exists for the same property on the same date → overwrite + - Otherwise → insert a new record + """ + + if not inspections_map: + return + + now = datetime.now(pytz.utc) + + for property_id, data in inspections_map.items(): + # Extract survey date from the data + record = asdict(data) + survey_date = getattr(data, "survey_date", None) or record.get("survey_date") + + if not survey_date: + continue # skip if no survey date available + + # Convert to UTC datetime if needed + if hasattr(survey_date, "to_pydatetime"): + survey_date = survey_date.to_pydatetime() + if survey_date.tzinfo is None: + survey_date = survey_date.replace(tzinfo=pytz.utc) + + record["property_id"] = property_id + record["created_at"] = survey_date + record["uploaded_at"] = now + + # Normalize enums and NaNs + for key, value in record.items(): + if isinstance(value, enum.Enum): + record[key] = value.value + elif isinstance(value, float) and math.isnan(value): + record[key] = None + + # Find existing inspection *for same property on same day* + start_of_day = survey_date.replace(hour=0, minute=0, second=0, microsecond=0) + end_of_day = start_of_day + timedelta(days=1) + + existing_inspection = ( + session.query(InspectionModel) + .filter( + InspectionModel.property_id == property_id, + InspectionModel.created_at >= start_of_day, + InspectionModel.created_at < end_of_day, + ) + .first() + ) + + if existing_inspection: + # Overwrite existing record (same survey day) + for field, value in record.items(): + setattr(existing_inspection, field, value) + existing_inspection.uploaded_at = now + else: + # Create new inspection for new day + new_inspection = InspectionModel(**record) + session.add(new_inspection) + + session.flush() diff --git a/backend/app/db/functions/property_functions.py b/backend/app/db/functions/property_functions.py index b17d8e53..99cc8ed7 100644 --- a/backend/app/db/functions/property_functions.py +++ b/backend/app/db/functions/property_functions.py @@ -3,16 +3,20 @@ ### import datetime import pytz +from sqlalchemy import select, or_, bindparam, update from sqlalchemy.orm import Session +from sqlalchemy.orm.exc import NoResultFound +from sqlalchemy.dialects.postgresql import insert + +from backend.addresses.Address import Address from backend.app.db.models.portfolio import ( PropertyModel, PropertyCreationStatus, PortfolioStatus, PropertyTargetsModel, PropertyDetailsEpcModel, PropertyDetailsSpatial ) -from sqlalchemy.orm.exc import NoResultFound def create_property(session: Session, portfolio_id: int, address: str, postcode: str, uprn: str, - energy_assessment: dict) -> (int, bool): + energy_assessment: dict, landlord_property_id: str | None = None) -> (int, bool): """ This function will create a record for the property in the database if it does not exist. If it does exist, it will just update the updated_at field. @@ -20,6 +24,9 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode: :param portfolio_id: The ID of the portfolio the property belongs to :param address: The address of the property :param postcode: The postcode of the property + :param uprn: The UPRN of the property + :param energy_assessment: The energy assessment data for the property + :param landlord_property_id: The landlord property ID if available :return: The ID of the property and a boolean indicating whether it was created or not """ @@ -49,6 +56,7 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode: postcode=postcode, portfolio_id=portfolio_id, uprn=uprn, + landlord_property_id=landlord_property_id, creation_status=PropertyCreationStatus.LOADING, status=status, has_pre_condition_report=False, @@ -63,6 +71,30 @@ def create_property(session: Session, portfolio_id: int, address: str, postcode: return new_property.id, True +def ensure_property_exists(session, body, epc_searcher, energy_assessment, landlord_property_id=None): + """ + Wrapper funtion which checks if a property is new and will return the roperty type if not + :param session: + :param body: + :param epc_searcher: + :param energy_assessment: + :param landlord_property_id: + :return: + """ + property_id, is_new = create_property( + session=session, + portfolio_id=body.portfolio_id, + address=epc_searcher.address_clean, + postcode=epc_searcher.postcode_clean, + uprn=epc_searcher.uprn, + energy_assessment=energy_assessment, + landlord_property_id=str(landlord_property_id) if landlord_property_id is not None else None + ) + if not is_new and not body.multi_plan: + return None, False + return property_id, is_new + + def create_property_targets( session: Session, property_id: int, portfolio_id: int, epc_target=None, heat_demand_target=None ): @@ -175,3 +207,162 @@ def update_or_create_property_spatial_details(session: Session, uprn: int, prope session.flush() return True + + +def get_existing_properties(session, portfolio_id, uprns, landlord_ids): + """ + Bulk method for checking for existing properties + :param session: + :param portfolio_id: + :param uprns: + :param landlord_ids: + :return: + """ + return ( + session.exec( + select(PropertyModel) + .where(PropertyModel.portfolio_id == portfolio_id) + .where( + or_( + PropertyModel.uprn.in_(uprns), + PropertyModel.landlord_property_id.in_(landlord_ids), + ) + ) + ) + .scalars() + .all() + ) + + +def bulk_create_properties( + session, + body, + addresses: list[Address], # these are *new* addresses + energy_assessment_by_uprn: dict[int, dict], +): + rows = [] + + for addr in addresses: + energy_assessment = energy_assessment_by_uprn.get(addr.uprn, {}) + status = ( + PortfolioStatus.ASSESSMENT.value + if not energy_assessment.get("epc") + else PortfolioStatus.SURVEY.value + ) + + rows.append( + { + "address": addr.address1, + "postcode": addr.postcode, + "portfolio_id": body.portfolio_id, + "uprn": addr.uprn, + "landlord_property_id": addr.landlord_property_id, + "creation_status": PropertyCreationStatus.LOADING, + "status": status, + "has_pre_condition_report": False, + "has_recommendations": False, + } + ) + + if not rows: + return [] + + stmt = ( + insert(PropertyModel) + .values(rows) + .on_conflict_do_nothing( + index_elements=["portfolio_id", "uprn"], + index_where=PropertyModel.uprn.isnot(None), + ) + .returning( + PropertyModel.id, + PropertyModel.uprn, + PropertyModel.landlord_property_id, + ) + ) + + result = session.execute(stmt) + session.flush() + + return result.fetchall() + + +def bulk_update_properties(session: Session, property_updates: list[dict]): + if not property_updates: + return + + now = datetime.datetime.now(pytz.utc) + + stmt = ( + update(PropertyModel.__table__) + .where( + PropertyModel.id == bindparam("b_id"), + PropertyModel.portfolio_id == bindparam("b_portfolio_id"), + ) + .values( + **{k: bindparam(k) for k in property_updates[0]["data"].keys()}, + updated_at=now, + ) + ) + + payload = [ + { + "b_id": row["property_id"], # renamed bind param + "b_portfolio_id": row["portfolio_id"], + **row["data"], + } + for row in property_updates + ] + + session.execute( + stmt, + payload, + execution_options={"synchronize_session": False}, + ) + + +def bulk_upsert_property_details_epc(session: Session, rows: list[dict]): + if not rows: + return + + insert_stmt = insert(PropertyDetailsEpcModel).values(rows) + + update_cols = { + col.name: insert_stmt.excluded[col.name] + for col in PropertyDetailsEpcModel.__table__.columns + if col.name not in ("id",) + } + + stmt = insert_stmt.on_conflict_do_update( + index_elements=["portfolio_id", "property_id"], + set_=update_cols, + ) + + session.execute(stmt) + + +def bulk_upsert_property_spatial(session: Session, rows: list[dict]): + if not rows: + return + + values = [] + for row in rows: + values.append({ + "uprn": row["uprn"], + **row["data"], + }) + + insert_stmt = insert(PropertyDetailsSpatial).values(values) + + update_cols = { + col.name: insert_stmt.excluded[col.name] + for col in PropertyDetailsSpatial.__table__.columns + if col.name not in ("id", "uprn") + } + + stmt = insert_stmt.on_conflict_do_update( + index_elements=["uprn"], + set_=update_cols, + ) + + session.execute(stmt) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index f42f66e1..51562f55 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,13 +1,93 @@ +from sqlalchemy import text from sqlalchemy import insert, delete from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError from backend.app.db.models.recommendations import ( Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario ) -from backend.app.db.models.portfolio import ( - PropertyModel, PropertyTargetsModel, PropertyDetailsEpcModel -) -from backend.app.db.models.funding import FundingPackageMeasures, FundingPackage +from backend.app.db.models.portfolio import PropertyModel +from backend.app.db.connection import db_session, db_read_session + + +def prepare_plan_data( + p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations, + rebaselining_carbon=0, rebaselining_heat_demand=0, rebaselining_kwh=0, rebaselining_bills=0, +): + """ + Utility function to prepare the data that goes into the production of a plan. Is a fairly rough and unstructured + function that will need improving in the future + :param p: Instantiated property + :param body: request body, PlanTriggerRequest + :param scenario_id: unique identifier for the scenario + :param eco_packages: Pre-constructed eco packages for a property + :param valuations: valuation improvement data + :param new_sap_points: sap points, post default recommendations + :param new_epc: new epc rating, post default recommendations + :param default_recommendations: list of default recommendations for a property + :param rebaselining_carbon: carbon emissions adjustment for rebaselining + :param rebaselining_heat_demand: heat demand adjustment for rebaselining + :param rebaselining_kwh: kwh consumption adjustment for rebaselining + :param rebaselining_bills: energy bill adjustment for rebaselining + :return: + """ + # Plan carbon savings + co2_savings = sum( + [r["co2_equivalent_savings"] for r in default_recommendations if not r.get("already_installed", False)] + ) + post_co2_emissions = p.energy["co2_emissions"] - rebaselining_carbon - co2_savings + + # Plan bill savings + energy_bill_savings = sum( + [r["energy_cost_savings"] for r in default_recommendations if not r.get("already_installed", False)] + ) + post_energy_bill = sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings + + # energy consumption + energy_consumption_savings = sum( + [r["kwh_savings"] for r in default_recommendations if not r.get("already_installed", False)] + ) + post_energy_consumption = p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings + + valuation_post_retrofit, valuation_increase = None, None + if valuations["current_value"]: + valuation_increase = valuations["average_increase"] + valuation_post_retrofit = valuations["average_increased_value"] + + # plan costing data + cost_of_works = sum([r["total"] for r in default_recommendations if not r.get("already_installed", False)]) + contingency_cost = sum( + [r.get("contingency", 0) for r in default_recommendations if not r.get("already_installed", False)] + ) + + return { + "portfolio_id": body.portfolio_id, + "property_id": p.id, + "scenario_id": scenario_id, + "is_default": True if p.is_new else False, + "name": body.scenario_name, + "valuation_increase_lower_bound": ( + valuations["lower_bound_increased_value"] - valuations["current_value"] + ), + "valuation_increase_upper_bound": ( + valuations["upper_bound_increased_value"] - valuations["current_value"] + ), + "valuation_increase_average": ( + valuations["average_increased_value"] - valuations["current_value"] + ), + "post_sap_points": float(new_sap_points), + "post_epc_rating": new_epc, + "post_co2_emissions": float(post_co2_emissions), + "co2_savings": float(co2_savings), + "post_energy_bill": float(post_energy_bill), + "energy_bill_savings": float(energy_bill_savings), + "post_energy_consumption": float(post_energy_consumption), + "energy_consumption_savings": float(energy_consumption_savings), + "valuation_post_retrofit": valuation_post_retrofit, + "valuation_increase": valuation_increase, + "cost_of_works": float(cost_of_works), + "contingency_cost": float(contingency_cost), + "plan_type": eco_packages.get(p.id, (None, None, None))[2] + } def create_plan(session: Session, plan): @@ -27,27 +107,47 @@ def create_plan(session: Session, plan): raise e -def create_scenario(session: Session, scenario): - """ - This function will create a record for the scenario in the database if it does not exist. - :param session: The database session - :param scenario: dictionary of data representing a scenario to be created - """ - try: +def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int, int]: + if not plans_to_create: + return {} - # Before creating a new scenario, we check if there is a scenario for this portfolio id already - # If there is, it means that any new scnario created will NOT be the default scenario - existing_scenario = session.query(Scenario).filter_by(portfolio_id=scenario["portfolio_id"]).first() - scenario["is_default"] = True if not existing_scenario else False + payload = [ + { + "property_id": p["property_id"], + **p["plan_data"], + } + for p in plans_to_create + ] - new_scenario = Scenario(**scenario) - session.add(new_scenario) - session.flush() - session.commit() - return new_scenario - except SQLAlchemyError as e: - session.rollback() - raise e + stmt = ( + insert(Plan) + .values(payload) + .returning(Plan.id, Plan.property_id) + ) + + result = session.execute(stmt).all() + + # property_id -> plan_id + return {row.property_id: row.id for row in result} + + +def create_scenario(session: Session, scenario: dict) -> int: + existing_scenario = ( + session.query(Scenario) + .filter_by(portfolio_id=scenario["portfolio_id"]) + .first() + ) + + scenario["is_default"] = not bool(existing_scenario) + + new_scenario = Scenario(**scenario) + session.add(new_scenario) + session.flush() # ensures ID is populated + + scenario_id = new_scenario.id + session.commit() + + return scenario_id def create_recommendation(session: Session, recommendation): @@ -168,54 +268,277 @@ def upload_recommendations(session: Session, recommendations_to_upload, property return False -def clear_portfolio(session: Session, portfolio_id: int): - # Fetch all property IDs associated with the given portfolio - property_ids = session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == portfolio_id).all() - property_ids = [p.id for p in property_ids] +def bulk_upload_recommendations_and_materials( + session: Session, + recommendation_payload: list[dict], +): + if not recommendation_payload: + return - # Fetch all recommendation IDs associated with the properties - recommendation_ids = session.query(Recommendation.id).filter(Recommendation.property_id.in_(property_ids)).all() - recommendation_ids = [r.id for r in recommendation_ids] + # --------------------------------------------------------- + # 1. Prepare recommendation rows + # --------------------------------------------------------- + recommendation_rows = [] + parts_by_index = [] + plan_ids_by_index = [] - # Fetch all plan IDs associated with the portfolio - plan_ids = session.query(Plan.id).filter(Plan.portfolio_id == portfolio_id).all() - plan_ids = [p.id for p in plan_ids] + for rec in recommendation_payload: + recommendation_rows.append({ + "property_id": rec["property_id"], + "type": rec["type"], + "measure_type": rec["measure_type"], + "description": rec["description"], + "estimated_cost": rec["estimated_cost"], + "default": rec["default"], + "starting_u_value": rec["starting_u_value"], + "new_u_value": rec["new_u_value"], + "sap_points": rec["sap_points"], + "heat_demand": rec["heat_demand"], + "kwh_savings": rec["kwh_savings"], + "co2_equivalent_savings": rec["co2_equivalent_savings"], + "energy_savings": rec["energy_savings"], + "energy_cost_savings": rec["energy_cost_savings"], + "total_work_hours": rec["total_work_hours"], + "labour_days": rec["labour_days"], + "already_installed": rec["already_installed"], + }) - # Delete all entries from RecommendationMaterials for these recommendations - session.execute( - delete(RecommendationMaterials).where(RecommendationMaterials.recommendation_id.in_(recommendation_ids)) + parts_by_index.append(rec["parts"]) + plan_ids_by_index.append(rec["plan_id"]) + + # --------------------------------------------------------- + # 2. Insert recommendations and get IDs + # --------------------------------------------------------- + result = session.execute( + insert(Recommendation) + .values(recommendation_rows) + .returning(Recommendation.id) ) - # Delete all entries from PlanRecommendations that reference plans in the portfolio - session.execute(delete(PlanRecommendations).where(PlanRecommendations.plan_id.in_( - session.query(Plan.id).filter(Plan.portfolio_id == portfolio_id).subquery().as_scalar() - ))) + recommendation_ids = [row[0] for row in result] + + # --------------------------------------------------------- + # 3. Insert recommendation materials + # --------------------------------------------------------- + materials_rows = [] + + for recommendation_id, parts in zip(recommendation_ids, parts_by_index): + for part in parts: + materials_rows.append({ + "recommendation_id": recommendation_id, + "material_id": part["material_id"], + "depth": part["depth"], + "quantity": part["quantity"], + "quantity_unit": part["quantity_unit"], + "estimated_cost": part["estimated_cost"], + }) + + if materials_rows: + session.execute( + insert(RecommendationMaterials).values(materials_rows) + ) + + # --------------------------------------------------------- + # 4. Insert plan ↔ recommendation links + # --------------------------------------------------------- + plan_recommendation_rows = [ + { + "plan_id": plan_id, + "recommendation_id": recommendation_id, + } + for plan_id, recommendation_id in zip( + plan_ids_by_index, recommendation_ids + ) + ] - # Delete FundingPackageMeasures → FundingPackage → Plan session.execute( - delete(FundingPackageMeasures).where(FundingPackageMeasures.funding_package_id.in_( - session.query(FundingPackage.id).filter(FundingPackage.plan_id.in_(plan_ids)) - )) - ) - session.execute( - delete(FundingPackage).where(FundingPackage.plan_id.in_(plan_ids)) + insert(PlanRecommendations).values(plan_recommendation_rows) ) - # Delete all Plans associated with the portfolio - session.execute(delete(Plan).where(Plan.portfolio_id == portfolio_id)) - # Delete all Scenarios associated with the portfolio - session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id)) +def chunked(iterable, size=100): + for i in range(0, len(iterable), size): + yield iterable[i:i + size] - # Delete all Recommendations associated with the properties - session.execute(delete(Recommendation).where(Recommendation.property_id.in_(property_ids))) - # Now, delete the PropertyModels and related details - # Delete PropertyTargetsModel, PropertyDetailsMeter, PropertyDetailsEpcModel, and PropertyModel - session.execute(delete(PropertyTargetsModel).where(PropertyTargetsModel.portfolio_id == portfolio_id)) - # session.execute(delete(PropertyDetailsMeter).where(PropertyDetailsMeter.uprn.in_(property_ids))) - session.execute(delete(PropertyDetailsEpcModel).where(PropertyDetailsEpcModel.portfolio_id == portfolio_id)) - session.execute(delete(PropertyModel).where(PropertyModel.portfolio_id == portfolio_id)) +def get_property_ids(portfolio_id: int) -> list[int]: + with db_read_session() as session: + return [ + pid for (pid,) in + session.query(PropertyModel.id) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ] - # Commit the changes - session.commit() + +def delete_property_batch(session: Session, property_ids: list[int]): + if not property_ids: + return + + params = {"property_ids": property_ids} + + # -------------------------------------------------- + # recommendation_materials (via recommendation) + # -------------------------------------------------- + session.execute( + text(""" + DELETE FROM recommendation_materials rm + USING recommendation r + WHERE rm.recommendation_id = r.id + AND r.property_id = ANY(:property_ids) + """), + params, + ) + + # -------------------------------------------------- + # plan_recommendations (via plan) + # -------------------------------------------------- + session.execute( + text(""" + DELETE FROM plan_recommendations pr + USING plan p + WHERE pr.plan_id = p.id + AND p.property_id = ANY(:property_ids) + """), + params, + ) + + # -------------------------------------------------- + # funding_package_measures + # -------------------------------------------------- + session.execute( + text(""" + DELETE FROM funding_package_measures fpm + USING funding_package fp, plan p + WHERE fpm.funding_package_id = fp.id + AND fp.plan_id = p.id + AND p.property_id = ANY(:property_ids) + """), + params, + ) + + # -------------------------------------------------- + # inspections (direct) + # -------------------------------------------------- + session.execute( + text(""" + DELETE FROM inspections + WHERE property_id = ANY(:property_ids) + """), + params, + ) + + # -------------------------------------------------- + # funding_package + # -------------------------------------------------- + session.execute( + text(""" + DELETE FROM funding_package fp + USING plan p + WHERE fp.plan_id = p.id + AND p.property_id = ANY(:property_ids) + """), + params, + ) + + # -------------------------------------------------- + # recommendation (direct — CRITICAL FIX) + # -------------------------------------------------- + session.execute( + text(""" + DELETE FROM recommendation + WHERE property_id = ANY(:property_ids) + """), + params, + ) + + # -------------------------------------------------- + # plan (direct) + # -------------------------------------------------- + session.execute( + text(""" + DELETE FROM plan + WHERE property_id = ANY(:property_ids) + """), + params, + ) + + # -------------------------------------------------- + # property-scoped tables + # -------------------------------------------------- + session.execute( + text(""" + DELETE FROM property_details_epc + WHERE property_id = ANY(:property_ids) + """), + params, + ) + + session.execute( + text(""" + DELETE FROM property_targets + WHERE property_id = ANY(:property_ids) + """), + params, + ) + + # -------------------------------------------------- + # properties LAST + # -------------------------------------------------- + session.execute( + text(""" + DELETE FROM property + WHERE id = ANY(:property_ids) + """), + params, + ) + + +def portfolio_has_properties(portfolio_id: int) -> bool: + with db_read_session() as session: + return session.query( + session.query(PropertyModel) + .filter(PropertyModel.portfolio_id == portfolio_id) + .exists() + ).scalar() + + +def delete_portfolio_scenarios_if_empty(portfolio_id: int): + if portfolio_has_properties(portfolio_id): + print("Properties still exist — skipping scenario deletion") + return + + with db_session() as session: + session.execute( + delete(Scenario) + .where(Scenario.portfolio_id == portfolio_id) + ) + + print("Deleted scenarios for empty portfolio") + + +def clear_portfolio_in_batches( + portfolio_id: int, + property_batch_size: int = 25, +): + property_ids = get_property_ids(portfolio_id) + + if not property_ids: + print("No properties found.") + delete_portfolio_scenarios_if_empty(portfolio_id) + return + + total = (len(property_ids) + property_batch_size - 1) // property_batch_size + import time + for i, batch in enumerate(chunked(property_ids, property_batch_size), start=1): + print(f"Deleting batch {i}/{total} ({len(batch)} properties)") + start_time = time.time() + with db_session() as session: + delete_property_batch(session, batch) + finish_time = time.time() + print(f"Batch {i} deleted in {finish_time - start_time:.2f} seconds") + + # scenario deletion happens AFTER all properties are gone + delete_portfolio_scenarios_if_empty(portfolio_id) + + print("Portfolio cleared in batches.") diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py new file mode 100644 index 00000000..d1ab9536 --- /dev/null +++ b/backend/app/db/functions/tasks/Tasks.py @@ -0,0 +1,323 @@ +# ---- Standard Library ---- +from typing import Optional, Dict, Any +from datetime import datetime, timezone +from uuid import UUID +import json + +# ---- SQLModel / SQLAlchemy ---- +from sqlmodel import Session, select + +# ---- DB Session ---- +from backend.app.db.connection import get_db_session + +# ---- Models ---- +from backend.app.db.models.tasks import Task, SubTask + + +# ============================================================ +# SubTask Interface +# ============================================================ +class SubTaskInterface: + """ + CRUD operations for SubTask + cascading Task progress updates. + """ + + # -------------------------------------------------------- + # CREATE SUBTASK + # -------------------------------------------------------- + def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None, status=None): + + now = datetime.now(timezone.utc) + with get_db_session() as session: + task = session.get(Task, task_id) + if not task: + raise ValueError(f"Task {task_id} not found") + + # We treat waiting as the default status + status = "waiting" if status is None else status + + subtask = SubTask( + task_id=task_id, + inputs=json.dumps(inputs) if inputs else None, + status=status, + job_started=now, + job_completed=None, + ) + + session.add(subtask) + session.commit() + session.refresh(subtask) + + # Recalculate parent task progress + self._update_task_progress(session, task_id) + return subtask.id + + # -------------------------------------------------------- + # UPDATE STATUS (in progress, complete, failed) + # -------------------------------------------------------- + def update_subtask_status( + self, subtask_id: UUID, status: str, outputs=None, cloud_logs_url=None + ): + """ + Update the status of a subtask, and recalculate the parent task progress. + :param subtask_id: UUID of the subtask to update + :param status: New status (in progress, complete, failed) + :param outputs: Optional outputs to set + :param cloud_logs_url: Optional cloud logs URL to set + :return: + """ + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + normalized = status.lower() + + # When job really starts + if normalized == "in progress" and subtask.job_started is None: + subtask.job_started = now + + # Completed or failed + if normalized in ("complete", "failed"): + subtask.job_completed = now + + subtask.status = normalized + subtask.updated_at = now + if outputs is not None: + subtask.outputs = json.dumps(outputs) + + if cloud_logs_url is not None: + subtask.cloud_logs_url = cloud_logs_url + + session.add(subtask) + session.commit() + + # Recalculate task status + self._update_task_progress(session, subtask.task_id) + + session.refresh(subtask) + return subtask + + # -------------------------------------------------------- + # UPDATE OUTPUTS + # -------------------------------------------------------- + @staticmethod + def update_subtask_output(subtask_id: UUID, outputs: Dict[str, Any]): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + subtask.outputs = json.dumps(outputs) + subtask.updated_at = now + + session.add(subtask) + session.commit() + session.refresh(subtask) + return subtask + + # -------------------------------------------------------- + # UPDATE CLOUD LOGS URL + # -------------------------------------------------------- + @staticmethod + def update_subtask_logs(subtask_id: UUID, cloud_logs_url: str): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + subtask.cloud_logs_url = cloud_logs_url + subtask.updated_at = now + + session.add(subtask) + session.commit() + session.refresh(subtask) + return subtask + + # -------------------------------------------------------- + # SET BOTH OUTPUT + LOGS + # -------------------------------------------------------- + @staticmethod + def set_subtask_result( + subtask_id: UUID, + outputs: Optional[Dict[str, Any]] = None, + cloud_logs_url: Optional[str] = None, + ): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + if outputs is not None: + subtask.outputs = json.dumps(outputs) + + if cloud_logs_url is not None: + subtask.cloud_logs_url = cloud_logs_url + + subtask.updated_at = now + session.add(subtask) + session.commit() + session.refresh(subtask) + return subtask + + # -------------------------------------------------------- + # TASK PROGRESS CALCULATION + # -------------------------------------------------------- + @staticmethod + def _update_task_progress(session: Session, task_id: UUID): + task = session.get(Task, task_id) + if not task: + return + + subtasks = session.exec( + select(SubTask).where(SubTask.task_id == task_id) + ).all() + + statuses = [s.status.lower() for s in subtasks] + now = datetime.now(timezone.utc) + + if "failed" in statuses: + task.status = "failed" + task.job_completed = now + + elif all(s == "complete" for s in statuses): + task.status = "complete" + task.job_completed = now + + elif "in progress" in statuses: + task.status = "in progress" + # if task.job_started is None: + # task.job_started = now + + else: + # All waiting + task.status = "waiting" + task.job_completed = None + + task.updated_at = now + session.add(task) + session.commit() + + def finalize_subtask( + self, + subtask_id: UUID, + status: str, + outputs: Optional[Dict[str, Any]], + cloud_logs_url: Optional[str] + ): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + subtask = session.get(SubTask, subtask_id) + if not subtask: + raise ValueError(f"SubTask {subtask_id} not found") + + normalized = status.lower() + if normalized not in ("complete", "failed"): + raise ValueError("Status must be 'complete' or 'failed'") + + # Set outputs + if outputs is not None: + subtask.outputs = json.dumps(outputs) + + # Set logs + if cloud_logs_url is not None: + subtask.cloud_logs_url = cloud_logs_url + + # Status + timestamps + subtask.status = normalized + subtask.job_completed = now + subtask.updated_at = now + + session.add(subtask) + session.commit() + + # Update parent task (complete/failed) + self._update_task_progress(session, subtask.task_id) + + session.refresh(subtask) + return subtask + + +# ============================================================ +# Task Interface +# ============================================================ +class TasksInterface: + """ + High-level operations for Task records. + """ + + @staticmethod + def create_task( + task_source: str, + service: Optional[str] = None, + inputs: Optional[Dict[str, Any]] = None, + task_only: bool = False, + ): + """ + Create a new Task record, and an initial SubTask in waiting state. Can also be used to create just + a task, without a subtask + :param task_source: Text indicating source of task creation (e.g. file path + function name) + :param service: Optional service name + :param inputs: Inputs of the job being run + :param task_only: If True, only create the Task record, without a SubTask + :return: + """ + now = datetime.now(timezone.utc) + with get_db_session() as session: + task = Task( + task_source=task_source, + service=service, + status="waiting", + job_started=now, + job_completed=None, + ) + + session.add(task) + session.commit() + session.refresh(task) + + if task_only: + return task.id, None + + # Create first subtask in waiting state + subtask_interface = SubTaskInterface() + subtask_id = subtask_interface.create_subtask( + task_id=task.id, + inputs=inputs, + ) + + return task.id, subtask_id + + @staticmethod + def update_task_status(task_id: UUID, status: str): + now = datetime.now(timezone.utc) + + with get_db_session() as session: + task = session.get(Task, task_id) + if not task: + raise ValueError(f"Task {task_id} not found") + + normalized = status.lower() + + if normalized == "in progress" and task.job_started is None: + task.job_started = now + + if normalized == "complete": + task.job_completed = now + + task.status = normalized + task.updated_at = now + + session.add(task) + session.commit() + session.refresh(task) + return task diff --git a/backend/app/db/functions/whlg_functions.py b/backend/app/db/functions/whlg_functions.py new file mode 100644 index 00000000..e318d004 --- /dev/null +++ b/backend/app/db/functions/whlg_functions.py @@ -0,0 +1,80 @@ +from backend.app.db.connection import get_db_session +from backend.app.db.models.whlg import Whlg + + +def upsert_whlg_postcode(postcode: str): + """ + Manually upsert a postcode into the WHLG table. + No unique constraint is required. + """ + + cleaned = postcode.lower().replace(" ", "") + + with get_db_session() as session: + # Check if record exists + existing = session.query(Whlg).filter(Whlg.postcode == cleaned).first() + + if existing: + return existing # nothing to update, just return it + + # Insert a new row + record = Whlg(postcode=cleaned) + session.add(record) + session.commit() + session.refresh(record) + + return record + + +# One time script to upload 400,000 records in one go with the pay +# of pandas and one insert +from backend.app.db.connection import get_db_session +from backend.app.db.models.whlg import Whlg +from sqlalchemy import select +from sqlalchemy.orm import Session + + +def upload_whlg_from_dataframe(df): + """ + FAST bulk insert of WHLG postcodes (400k+ rows). + No unique constraint needed. + """ + + if "Postcode" not in df.columns: + raise ValueError("DataFrame must contain a 'Postcode' column") + + # 1. Clean incoming postcodes + cleaned_postcodes = ( + df["Postcode"] + .astype(str) + .str.lower() + .str.replace(" ", "", regex=False) + .dropna() + .unique() + .tolist() + ) + + with get_db_session() as session: + # 2. Fetch existing postcodes once (VERY FAST) + existing = session.exec(select(Whlg.postcode)).all() + existing_set = set(existing) + + # 3. Determine which are new + new_postcodes = [ + pc for pc in cleaned_postcodes if pc not in existing_set + ] + + if not new_postcodes: + return {"inserted": 0, "skipped_existing": len(cleaned_postcodes)} + + # 4. Bulk insert new postcodes in one shot + objects = [Whlg(postcode=pc) for pc in new_postcodes] + + session.bulk_save_objects(objects) + session.commit() + + return { + "inserted": len(new_postcodes), + "skipped_existing": len(cleaned_postcodes) - len(new_postcodes), + "total_provided": len(cleaned_postcodes) + } diff --git a/backend/app/db/models/addresses.py b/backend/app/db/models/addresses.py new file mode 100644 index 00000000..51e9540f --- /dev/null +++ b/backend/app/db/models/addresses.py @@ -0,0 +1,34 @@ +from sqlalchemy import ( + Column, + Integer, + String, + JSON, + TIMESTAMP, + func, + UniqueConstraint, +) +from sqlalchemy.orm import declarative_base + +Base = declarative_base() + + +class PostcodeSearch(Base): + __tablename__ = "postcode_search" + + id = Column(Integer, primary_key=True, autoincrement=True) + + # Normalized postcode (uppercase, no spaces) + postcode = Column(String, nullable=False, unique=True) + + # Full OS Places API response (stored as JSONB) + result_data = Column(JSON, nullable=False) + + # Timestamp for when the entry was first created + created_at = Column(TIMESTAMP(timezone=False), server_default=func.now(), nullable=False) + + __table_args__ = ( + UniqueConstraint("postcode", name="uq_postcode_search_postcode"), + ) + + def __repr__(self): + return f"" diff --git a/backend/app/db/models/epc.py b/backend/app/db/models/epc.py new file mode 100644 index 00000000..5a216040 --- /dev/null +++ b/backend/app/db/models/epc.py @@ -0,0 +1,29 @@ +from sqlalchemy import ( + Column, + Integer, + String, + JSON, + TIMESTAMP, + UniqueConstraint, +) +from sqlalchemy.orm import declarative_base + +Base = declarative_base() + + +class EpcStore(Base): + """ + Stores EPC data retrieved from the EPC API and EPC web pages. + """ + __tablename__ = "epc_store" + + id = Column(Integer, primary_key=True, autoincrement=True) + uprn = Column(Integer) + epc_api_created_at = Column(TIMESTAMP(timezone=False)) + epc_api = Column(JSON, nullable=False) + epc_page_created_at = Column(TIMESTAMP(timezone=False)) + epc_page = Column(String) + epc_page_rrn = Column(String) + + def __repr__(self): + return f"" diff --git a/backend/app/db/models/inspections.py b/backend/app/db/models/inspections.py new file mode 100644 index 00000000..473f8a02 --- /dev/null +++ b/backend/app/db/models/inspections.py @@ -0,0 +1,262 @@ +import enum +import pytz +import datetime +from sqlalchemy import ( + Column, + BigInteger, + Text, + DateTime, + Enum, + ForeignKey, +) +from sqlalchemy.ext.declarative import declarative_base +from backend.app.db.models.portfolio import PropertyModel + +Base = declarative_base() + + +# ------------------------------------------------------------------- +# ENUM DEFINITIONS (equivalent to drizzle pgEnum calls) +# ------------------------------------------------------------------- + +class InspectionArchetype(enum.Enum): + BUNGALOW = "Bungalow" + FLAT = "Flat" + MAISONETTE = "Maisonette" + HOUSE = "House" + NON_DOMESTIC = "non-domestic" + + +class InspectionArchetype2(enum.Enum): + DETACHED = "detached" + MID_TERRACE = "mid-terrace" + ENCLOSED_MID_TERRACE = "enclosed mid-terrace" + END_TERRACE = "end-terrace" + ENCLOSED_END_TERRACE = "enclosed end-terrace" + SEMI_DETACHED = "semi-detached" + + +class InspectionsWallConstruction(enum.Enum): + CAVITY = "cavity" + SOLID = "solid" + SYSTEM_BUILT = "system built" + TIMBER_FRAMED = "timber framed" + STEEL_FRAMED = "steel framed" + RE_WALLED_CAVITY = "re-walled cavity" + MANSARD_PRE_FAB = "mansard pre-fab" + MANSARD_EWI = "mansard ewi" + MANSARD_RE_WALLED = "mansard re-walled" + + +class InspectionsWallInsulation(enum.Enum): + EMPTY_CAVITY = "empty cavity" + FILLED_AT_BUILD = "filled at build" + PARTIAL = "partial" + RETRO_DRILLED = "retro drilled" + EWI = "ewi" + IWI = "iwi" + SOLID_NON_CAVITY = "solid non-cavity" + SYSTEM_BUILT = "system built" + TIMBER_FRAMED = "timber framed" + STEEL_FRAMED = "steel framed" + + +class InspectionsInsulationMaterial(enum.Enum): + EMPTY_50_90 = "empty 50-90" + EMPTY_100_PLUS = "empty 100+" + EMPTY_30_40 = "empty 30-40" + EMPTY_LESS_THAN_30 = "empty less than 30" + LOOSE_FIBRE_WOOL = "loose fibre/wool" + EPS_CELO_KING = "eps/celo/king" + FIBRE_BATTS_WITH_CAVITY = "fibre batts - with cavity" + FIBRE_BATTS_NO_CAVITY = "fibre batts - no cavity" + LOOSE_BEAD = "loose bead" + GLUED_BEAD = "glued bead" + FORMALDEHYDE = "formaldehyde" + BUBBLE_WRAP = "bubble wrap" + POLY_CHUNKS = "poly chunks" + + +class InspectionBorescoped(enum.Enum): + YES = "yes" + NO = "no" + REFUSED = "refused" + + +class InspectionsRoofOrientation(enum.Enum): + NORTH = "north" + EAST = "east" + SOUTH = "south" + WEST = "west" + NORTH_EAST = "north-east" + NORTH_WEST = "north-west" + SOUTH_EAST = "south-east" + SOUTH_WEST = "south-west" + N_S_SPLIT = "n/s split" + E_W_SPLIT = "e/w split" + NE_SW_SPLIT = "ne/sw split" + NW_SE_SPLIT = "nw/se split" + FLAT_ROOF = "flat roof" + NO_ROOF = "no roof" + ROOF_TOO_SMALL = "roof too small" + ALREADY_HAS_SOLAR_PV = "already has solar pv" + + +class InspectionsTileHung(enum.Enum): + YES = "yes" + NO = "no" + FIRST_FLOOR_FLATS_TILE_HUNG = "first floor flats are tile hung" + + +class InspectionsRendered(enum.Enum): + NO_RENDER = "no render" + INSUFFICIENT_DPC_SPACE = "rendered with “insufficient” space between dpc and render" + SUFFICIENT_DPC_SPACE = "rendered with “sufficient” space between dpc and render" + + +class InspectionsCladding(enum.Enum): + NONE = "none" + SUFFICIENT_SPACE = "cladded with “sufficient space to fill the wall”" + INSUFFICIENT_SPACE = "cladded with “insufficient space to fill the wall”" + + +class InspectionsAccessIssues(enum.Enum): + SEE_NOTES = "see notes" + DAMP_ISSUES = "damp issues" + FOLIAGE_ON_WALLS = "foliage on walls" + BUSHES_AGAINST_WALL = "bushes against wall" + TREES_AROUND_ABOVE = "trees around/anove property" + HIGH_RISE = "high rise block flats/maisonettes" + CONSERVATORY = "conservatory" + LEAN_TO = "lean-to" + GARAGE = "garage" + EXTENSION = "extension" + DECKING = "decking" + SHED_AGAINST_WALL = "shed against wall" + + +class InspectionModel(Base): + __tablename__ = "inspections" + + id = Column(BigInteger, primary_key=True, autoincrement=True) + property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False) + + archetype = Column( + Enum( + InspectionArchetype, + name="inspection_archetype", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + archetype_2 = Column( + Enum( + InspectionArchetype2, + name="inspection_archetype_2", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + wall_construction = Column( + Enum( + InspectionsWallConstruction, + name="inspections_wall_construction", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + insulation = Column( + Enum( + InspectionsWallInsulation, + name="inspections_wall_insulation", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + insulation_material = Column( + Enum( + InspectionsInsulationMaterial, + name="inspections_insulation_material", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + borescoped = Column( + Enum( + InspectionBorescoped, + name="inspection_borescoped", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + roof_orientation = Column( + Enum( + InspectionsRoofOrientation, + name="inspections_roof_orientation", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + tile_hung = Column( + Enum( + InspectionsTileHung, + name="inspections_tile_hung", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + rendered = Column( + Enum( + InspectionsRendered, + name="inspections_rendered", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + cladding = Column( + Enum( + InspectionsCladding, + name="inspections_cladding", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + access_issues = Column( + Enum( + InspectionsAccessIssues, + name="inspections_access_issues", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + + notes = Column(Text) + surveyor_name = Column(Text) + + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + uploaded_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py index 617ea0ac..8a524491 100644 --- a/backend/app/db/models/materials.py +++ b/backend/app/db/models/materials.py @@ -19,6 +19,8 @@ class MaterialType(enum.Enum): flat_roof_insulation = "flat_roof_insulation" room_roof_insulation = "room_roof_insulation" windows_glazing = "windows_glazing" + secondary_glazing = "secondary_glazing" + double_glazing = "double_glazing" cavity_wall_extraction = "cavity_wall_extraction" iwi_wall_demolition = "iwi_wall_demolition" @@ -43,8 +45,13 @@ class MaterialType(enum.Enum): solar_pv = "solar_pv" solar_battery = "solar_battery" scaffolding = "scaffolding" + # Heating systems high_heat_retention_storage_heaters = "high_heat_retention_storage_heaters" + air_soruce_heat_pump = "air_soruce_heat_pump" + boiler_upgrade = "boiler_upgrade" sealing_fireplace = "sealing_fireplace" + roomstat_programmer_trvs = "roomstat_programmer_trvs" + time_temperature_zone_control = "time_temperature_zone_control" class DepthUnit(enum.Enum): diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index 5f51cf46..d151bdc4 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -4,6 +4,7 @@ import datetime from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint from sqlalchemy.ext.declarative import declarative_base from backend.app.db.models.users import UserModel # noqa +from backend.app.db.models.materials import MaterialType Base = declarative_base() @@ -86,6 +87,7 @@ class PropertyModel(Base): portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False) creation_status = Column(Enum(PropertyCreationStatus), nullable=False) uprn = Column(Integer) + landlord_property_id = Column(Text) building_reference_number = Column(Integer) status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False) address = Column(Text) @@ -104,6 +106,10 @@ class PropertyModel(Base): current_epc_rating = Column(Enum(Epc)) current_sap_points = Column(Float) current_valuation = Column(Float) + # Following fields are for recording already installed adjustments to a property's SAP + installed_measures_sap_point_adjustment = Column(Float) + is_sap_points_adjusted_for_installed_measures = Column(Boolean, default=False) + original_sap_points = Column(Float) class FeatureRating(enum.Enum): @@ -135,6 +141,8 @@ class PropertyDetailsEpcModel(Base): property_id = Column(Integer, ForeignKey('property.id'), nullable=False) portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False) full_address = Column(Text) + lodgement_date = Column(DateTime) + is_expired = Column(Boolean) total_floor_area = Column(Float) walls = Column(Text) walls_rating = Column(Integer, CheckConstraint('walls_rating>=1 AND walls_rating<=5')) @@ -173,6 +181,9 @@ class PropertyDetailsEpcModel(Base): current_energy_demand = Column(Float) current_energy_demand_heating_hotwater = Column(Float) estimated = Column(Boolean, default=False) + sap_05_overwritten = Column(Boolean, default=False) + sap_05_score = Column(Integer) + sap_05_epc_rating = Column(Enum(Epc)) # Include estimates for energy bills, across the different types of energy heating_cost_current = Column(Float) hot_water_cost_current = Column(Float) @@ -181,6 +192,18 @@ class PropertyDetailsEpcModel(Base): gas_standing_charge = Column(Float) electricity_standing_charge = Column(Float) + # Columns for re-baselining if we have an already installed measure + original_co2_emissions = Column(Float) + original_primary_energy_consumption = Column(Float) + original_current_energy_demand = Column(Float) + original_current_energy_demand_heating_hotwater = Column(Float) + # Adjustments + installed_measures_co2_adjustment = Column(Float) + installed_measures_energy_demand_adjustment = Column(Float) + installed_measures_total_energy_bill_adjustment = Column(Float) + installed_measures_heat_demand_adjustment = Column(Float) + is_epc_adjusted_for_installed_measures = Column(Boolean, default=False) + class PropertyDetailsSpatial(Base): __tablename__ = "property_details_spatial" @@ -224,3 +247,18 @@ class PortfolioUsers(Base): role = Column(Text, nullable=False) created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + + +class PropertyInstalledMeasures(Base): + """ + This model keeps a record of the installed measures for each property, at the UPRN level + """ + __tablename__ = 'property_installed_measures' + id = Column(Integer, primary_key=True, autoincrement=True) + uprn = Column(Integer, nullable=False) + measure_type = Column( + Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False), + nullable=False + ) + created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + installed_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index bd5c4e20..ed1fcefa 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -3,7 +3,9 @@ from sqlalchemy.orm import declarative_base from sqlalchemy.sql import func from backend.app.db.models.portfolio import Portfolio, PropertyModel from backend.app.db.models.materials import Material +from backend.app.db.models.portfolio import Epc from datatypes.enums import QuantityUnits +import enum Base = declarative_base() @@ -47,6 +49,14 @@ class RecommendationMaterials(Base): estimated_cost = Column(Float, nullable=False) +class PlanTypeEnum(enum.Enum): + SOLAR_ECO4 = "solar_eco4" + SOLAR_HHRSH_ECO4 = "solar_hhrsh_eco4" + EMPTY_CAVITY_ECO = "empty_cavity_eco" + PARTIAL_CAVITY_ECO = "partial_cavity_eco" + EXTRACTION_ECO = "extraction_eco" + + class Plan(Base): __tablename__ = 'plan' @@ -60,6 +70,28 @@ class Plan(Base): valuation_increase_lower_bound = Column(Float) valuation_increase_upper_bound = Column(Float) valuation_increase_average = Column(Float) + plan_type = Column( + Enum( + PlanTypeEnum, + name="plan_type", + values_callable=lambda e: [m.value for m in e], + create_type=False, + ), + nullable=True, + ) + post_sap_points = Column(Float) + post_epc_rating = Column(Enum(Epc)) + post_co2_emissions = Column(Float) + co2_savings = Column(Float) + post_energy_bill = Column(Float) + energy_bill_savings = Column(Float) + post_energy_consumption = Column(Float) # energy demand in kWh/year + energy_consumption_savings = Column(Float) + valuation_post_retrofit = Column(Float) + valuation_increase = Column(Float) + # Financial metrics, excluding funding + cost_of_works = Column(Float) + contingency_cost = Column(Float) class PlanRecommendations(Base): @@ -114,3 +146,58 @@ class Scenario(Base): valuation_return_on_investment = Column(String) property_valuation_increase = Column(Float) labour_days = Column(Float) + + +class MeasureType(enum.Enum): + air_source_heat_pump = "air_source_heat_pump" + boiler_upgrade = "boiler_upgrade" + high_heat_retention_storage_heaters = "high_heat_retention_storage_heaters" + secondary_heating = "secondary_heating" + + roomstat_programmer_trvs = "roomstat_programmer_trvs" + time_temperature_zone_control = "time_temperature_zone_control" + cylinder_thermostat = "cylinder_thermostat" + + cavity_wall_insulation = "cavity_wall_insulation" + extension_cavity_wall_insulation = "extension_cavity_wall_insulation" + external_wall_insulation = "external_wall_insulation" + internal_wall_insulation = "internal_wall_insulation" + loft_insulation = "loft_insulation" + flat_roof_insulation = "flat_roof_insulation" + room_roof_insulation = "room_roof_insulation" + solid_floor_insulation = "solid_floor_insulation" + suspended_floor_insulation = "suspended_floor_insulation" + + double_glazing = "double_glazing" + secondary_glazing = "secondary_glazing" + draught_proofing = "draught_proofing" + + mechanical_ventilation = "mechanical_ventilation" + low_energy_lighting = "low_energy_lighting" + solar_pv = "solar_pv" + hot_water_tank_insulation = "hot_water_tank_insulation" + sealing_open_fireplace = "sealing_open_fireplace" + + +class InstalledMeasure(Base): + __tablename__ = "installed_measure" + + id = Column(BigInteger, primary_key=True, autoincrement=True) + uprn = Column(BigInteger, nullable=False) + measure_type = Column( + Enum( + MeasureType, + name="measure_type", + values_callable=lambda e: [m.value for m in e], + create_type=False, # <-- critical + ), + nullable=False, + ) + installed_at = Column(TIMESTAMP) + sap_points = Column(Float) + carbon_savings = Column(Float) + kwh_savings = Column(Float) + bill_savings = Column(Float) + heat_demand_savings = Column(Float) + source = Column(String) + is_active = Column(Boolean, nullable=False, default=True) diff --git a/backend/app/db/models/tasks.py b/backend/app/db/models/tasks.py new file mode 100644 index 00000000..cfe18d83 --- /dev/null +++ b/backend/app/db/models/tasks.py @@ -0,0 +1,36 @@ +from typing import Optional +from datetime import datetime +from uuid import UUID, uuid4 + +from sqlmodel import SQLModel, Field, Relationship + + +class Task(SQLModel, table=True): + __tablename__ = "tasks" + + id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, ) + task_source: str + job_started: Optional[datetime] = None + job_completed: Optional[datetime] = None + status: str = Field(default="In Progress") + service: Optional[str] = None + updated_at: datetime = Field(default_factory=datetime.utcnow) + + sub_tasks: list["SubTask"] = Relationship(back_populates="task") + + +class SubTask(SQLModel, table=True): + __tablename__ = "sub_task" + + id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, ) + + task_id: UUID = Field(foreign_key="tasks.id") + job_started: Optional[datetime] = None + job_completed: Optional[datetime] = None + status: str = Field(default="In Progress") + inputs: Optional[str] = None + outputs: Optional[str] = None + cloud_logs_url: Optional[str] = None + updated_at: datetime = Field(default_factory=datetime.utcnow) + + task: Optional["Task"] = Relationship(back_populates="sub_tasks") diff --git a/backend/app/db/models/whlg.py b/backend/app/db/models/whlg.py new file mode 100644 index 00000000..29d907e4 --- /dev/null +++ b/backend/app/db/models/whlg.py @@ -0,0 +1,15 @@ +import uuid +from typing import Optional +from sqlmodel import SQLModel, Field + + +class Whlg(SQLModel, table=True): + __tablename__ = "whlg" + + id: Optional[int] = Field( + default=None, + primary_key=True, + index=True, + ) + + postcode: str = Field(nullable=False) \ No newline at end of file diff --git a/backend/app/local/router.py b/backend/app/local/router.py index 4ebb490c..0977be04 100644 --- a/backend/app/local/router.py +++ b/backend/app/local/router.py @@ -31,6 +31,11 @@ def create_dummy_token(secret: str) -> str: return token +@router.get("/") +async def dummy_token(): + return {"hello": "world"} + + @router.get("/dummy-token") async def dummy_token(): settings = get_settings() diff --git a/backend/app/main.py b/backend/app/main.py index de6f0795..f0ab4d86 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -6,14 +6,19 @@ from fastapi.encoders import jsonable_encoder from starlette.exceptions import HTTPException as StarletteHTTPException from mangum import Mangum from backend.app.portfolio import router as portfolio_router +from backend.app.whlg import router as whlg_router from backend.app.plan import router as plan_router +from backend.app.tasks import router as tasks_router from backend.app.dependencies import validate_api_key from backend.app.config import get_settings logger = logging.getLogger("uvicorn.error") logging.basicConfig(level=logging.INFO) -app = FastAPI(dependencies=[Depends(validate_api_key)]) +if get_settings().ENVIRONMENT == "local": + app = FastAPI() +else: + app = FastAPI(dependencies=[Depends(validate_api_key)]) # Handle 422 errors (validation failures) @@ -52,10 +57,76 @@ async def log_requests(request: Request, call_next): app.include_router(portfolio_router.router, prefix="/v1") app.include_router(plan_router.router, prefix="/v1") +app.include_router(whlg_router.router, prefix="/v1") +app.include_router(tasks_router.router, prefix="/v1") + +if get_settings().ENVIRONMENT == "local": + from app.local import router as local_router + app.include_router(local_router.router) + +handler = Mangum(app) +import logging +from fastapi.responses import JSONResponse +from fastapi import FastAPI, Depends, Request, status +from fastapi.exceptions import RequestValidationError +from fastapi.encoders import jsonable_encoder +from starlette.exceptions import HTTPException as StarletteHTTPException +from mangum import Mangum +from backend.app.portfolio import router as portfolio_router +from backend.app.whlg import router as whlg_router +from backend.app.plan import router as plan_router +from backend.app.dependencies import validate_api_key +from backend.app.config import get_settings + +logger = logging.getLogger("uvicorn.error") +logging.basicConfig(level=logging.INFO) + +if get_settings().ENVIRONMENT == "local": + app = FastAPI() +else: + app = FastAPI(dependencies=[Depends(validate_api_key)]) + + +# Handle 422 errors (validation failures) +@app.exception_handler(RequestValidationError) +async def validation_exception_handler(request: Request, exc: RequestValidationError): + logger.error(f"422 Validation Error at {request.url}") + logger.error(f"Body: {exc.body}") + logger.error(f"Validation Errors: {exc.errors()}") + return JSONResponse( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + content=jsonable_encoder({ + "detail": exc.errors(), + "body": exc.body + }), + ) + + +# Handle generic HTTP exceptions (optional, useful for catching 404, 403, etc.) +@app.exception_handler(StarletteHTTPException) +async def http_exception_handler(request: Request, exc: StarletteHTTPException): + logger.warning(f"{exc.status_code} Error at {request.url} - Detail: {exc.detail}") + return JSONResponse( + status_code=exc.status_code, + content={"detail": exc.detail}, + ) + + +# Middleware to log requests +@app.middleware("http") +async def log_requests(request: Request, call_next): + logger.info(f"Incoming request: {request.method} {request.url}") + response = await call_next(request) + logger.info(f"Response status: {response.status_code}") + return response + + +app.include_router(portfolio_router.router, prefix="/v1") +app.include_router(plan_router.router, prefix="/v1") +app.include_router(whlg_router.router, prefix="/v1") if get_settings().ENVIRONMENT == "local": from app.local import router as local_router - app.include_router(local_router.router) handler = Mangum(app) diff --git a/backend/app/plan/data_classes.py b/backend/app/plan/data_classes.py new file mode 100644 index 00000000..99f6156b --- /dev/null +++ b/backend/app/plan/data_classes.py @@ -0,0 +1,9 @@ +from dataclasses import dataclass +from typing import Any, Optional + + +@dataclass +class PropertyRequestData: + patch: dict + non_invasive_recommendations: dict + valuation: Optional[float] diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index b897dbda..ea41162f 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -2,7 +2,8 @@ import boto3 import json import math import asyncio -import random +from contextlib import contextmanager +from sqlmodel import Session from datetime import datetime @@ -15,6 +16,7 @@ from utils.logger import setup_logger from backend.app.db.connection import db_engine from backend.app.db.functions.recommendations_functions import create_scenario +from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface logger = setup_logger() @@ -28,6 +30,19 @@ router = APIRouter( sqs_client = boto3.client("sqs") +@contextmanager +def db_session(): + session = Session(db_engine) + try: + yield session + session.commit() + except Exception: + session.rollback() + raise + finally: + session.close() + + @router.post("/trigger", status_code=202) async def trigger_plan_entrypoint(body: PlanTriggerRequest): """ @@ -56,36 +71,57 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): scenario_id = data.get("scenario_id") if not scenario_id: created_at = datetime.now().isoformat() - session = sessionmaker(bind=db_engine)() - - # Create a new scenario - new_scenario = create_scenario( - session=session, - scenario={ - "name": body.scenario_name, - "created_at": created_at, - "budget": body.budget, - "portfolio_id": body.portfolio_id, - "housing_type": body.housing_type, - "goal": body.goal, - "goal_value": body.goal_value, - "trigger_file_path": body.trigger_file_path, - "already_installed_file_path": body.already_installed_file_path, - "patches_file_path": body.patches_file_path, - "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path, - "exclusions": body.exclusions, - "multi_plan": body.multi_plan - } - ) - scenario_id = new_scenario.id + with db_session() as session: + # Create a new scenario + scenario_id = create_scenario( + session=session, + scenario={ + "name": body.scenario_name, + "created_at": created_at, + "budget": body.budget, + "portfolio_id": body.portfolio_id, + "housing_type": body.housing_type, + "goal": body.goal, + "goal_value": body.goal_value, + "trigger_file_path": body.trigger_file_path, + "already_installed_file_path": body.already_installed_file_path, + "patches_file_path": body.patches_file_path, + "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path, + "exclusions": body.exclusions, + "multi_plan": body.multi_plan + } + ) # Insert the scenario ID into the data payload data["scenario_id"] = scenario_id + # Create a main task + task_id, _ = TasksInterface.create_task( + task_source="backend/plan/router.py:trigger_plan_entrypoint", + service="plan_engine", + inputs=data, + task_only=True + ) + + subtask_interface = SubTaskInterface() for i in range(total_chunks): + # Create an entry in the request logs table index_start = i * chunk_size index_end = min((i + 1) * chunk_size, total_rows) - message_payload = {**data, "index_start": index_start, "index_end": index_end} + message_payload = { + **data, "index_start": index_start, "index_end": index_end, + } + + # Create a subtask for this chunk + subtask_id = subtask_interface.create_subtask( + task_id=task_id, + inputs=message_payload + ) + + # Add task and subtask to message + message_payload["task_id"] = str(task_id) + message_payload["subtask_id"] = str(subtask_id) + message_body = json.dumps(message_payload) response = sqs_client.send_message( @@ -96,7 +132,9 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): f"Chunk {i} sent to SQS. Rows {index_start}–{index_end}. Message ID: {response.get('MessageId')}" ) - await asyncio.sleep(random.uniform(0.1, 0.5)) # Delay to reduce spike pressure + await asyncio.sleep(0.05) # Small delay to avoid SQS throttling + + # await asyncio.sleep(random.uniform(0.1, 0.5)) # Delay to reduce spike pressure except Exception as e: logger.error("Error during Excel file handling: %s", e) @@ -105,6 +143,14 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): else: # Fallback: Just send a single message try: + task_id, subtask_id = TasksInterface.create_task( + task_source="backend/plan/router.py:trigger_plan_entrypoint", + service="plan_engine", + inputs=data, + task_only=False, + ) + data["task_id"] = str(task_id) + data["subtask_id"] = str(subtask_id) message_body = json.dumps(data) response = sqs_client.send_message( QueueUrl=settings.ENGINE_SQS_URL, diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index feff11fd..edac31dc 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -17,7 +17,7 @@ ECO4_ELIGIBILE_FABRIC_MEASURES = [ "suspended_floor_insulation", "solid_floor_insulation", "double_glazing", "secondary_glazing" ] ECO4_ELIGIBLE_HEATING_MEASURES = [ - "boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump", "solar_pv" + "boiler_upgrade", "high_heat_retention_storage_heaters", "air_source_heat_pump", "solar_pv" ] SPECIFIC_MEASURES = ( @@ -48,14 +48,14 @@ MEASURE_MAP = { ], "roof_insulation": ["loft_insulation", "flat_roof_insulation", "room_roof_insulation"], "floor_insulation": ["suspended_floor_insulation", "solid_floor_insulation"], - "heating": ["boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump"], + "heating": ["boiler_upgrade", "high_heat_retention_storage_heaters", "air_source_heat_pump"], "windows": ["double_glazing", "secondary_glazing"], "heating_controls": ["roomstat_programmer_trvs", "time_temperature_zone_control"] } VALID_GOALS = ["Increasing EPC", "Energy Savings", "Reducing CO2 emissions"] VALID_HOUSING_TYPES = ["Social", "Private"] -VALID_EVENT_TYPES = ["remote_assessment"] +VALID_EVENT_TYPES = ["remote_assessment", "eco_project"] # Define the validation function for inclusions/exclusions @@ -113,7 +113,7 @@ class PlanTriggerRequest(BaseModel): # When performing a remote assessment, if this has been set, it will allow the engine to # pull data from the find my epc website, to utilise as part of a remote assessment - event_type: Optional[Literal["remote_assessment"]] = None + event_type: Optional[Literal["remote_assessment", "eco_project"]] = None # If true, before optimising the engine will select a slightly larger package, to account for the SAP 10 causing # scores to drop by a few points @@ -129,6 +129,13 @@ class PlanTriggerRequest(BaseModel): index_start: Optional[int] = None index_end: Optional[int] = None + # Task and subtask IDs + task_id: Optional[str] = None + subtask_id: Optional[str] = None + + # Optional flag to trigger a fabric first task + enforce_fabric_first: Optional[bool] = False + @model_validator(mode="after") def check_indexes(self): if (self.index_start is None) != (self.index_end is None): diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 34fb02e7..33f391d4 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -1,7 +1,16 @@ -from utils.s3 import read_from_s3 - -from backend.app.config import get_settings +import ast +import os import msgpack +from uuid import UUID +from utils.s3 import read_from_s3 +from backend.addresses.Address import Address +from backend.app.config import get_settings +from backend.app.plan.data_classes import PropertyRequestData +from backend.app.db.functions.tasks.Tasks import SubTaskInterface +from starlette.responses import Response +from utils.logger import setup_logger + +logger = setup_logger() def get_cleaned(): @@ -21,3 +30,217 @@ def get_cleaned(): cleaned = msgpack.unpackb(cleaned, raw=False) return cleaned + + +def patch_epc(patch, epc_records): + """ + This utility function is useful to patch the epc data if we have data from the customer + :return: + """ + + for patch_variable, patch_value in patch.items(): + + if patch_variable in ["address", "postcode"]: + continue + + if patch_value in ["", None]: + continue + if patch_variable in epc_records["original_epc"]: + epc_records["original_epc"][patch_variable] = patch_value + + return epc_records + + +def extract_property_request_data( + address: Address, patches, non_invasive_recommendations, valuation_data, uprn +): + patch_has_uprn = "uprn" in patches[0] if patches else True + if patch_has_uprn: + patch = next(( + x for x in patches if str(x["uprn"]) == str(address.uprn) + ), {}) + else: + patch = next(( + x for x in patches if (x["address"] == address.address) and (x["postcode"] == address.postcode) + ), {}) + + # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN + # we need to check existence of uprn + has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False + if has_uprn: + has_uprn = non_invasive_recommendations[0]["uprn"] not in ["", None] + + if has_uprn: + property_non_invasive_recommendations = next(( + x for x in non_invasive_recommendations if + (str(x["uprn"]) == str(uprn)) + ), {}) + + # We patch the non-invasive recs that are ['cavity_extract_and_refill'] + else: + property_non_invasive_recommendations = next(( + x for x in non_invasive_recommendations if + (x["address"] == address.address) and (x["postcode"] == address.postcode) + ), {}) + + if isinstance(property_non_invasive_recommendations.get("recommendations"), str): + property_non_invasive_recommendations["recommendations"] = ast.literal_eval( + property_non_invasive_recommendations["recommendations"] + ) + transformed = [] + for rec in property_non_invasive_recommendations["recommendations"]: + if isinstance(rec, str): + transformed.append({"type": rec, }) + else: + transformed.append(rec) + + property_non_invasive_recommendations["recommendations"] = transformed + + # Check if the valuation data has uprn + valuation_has_uprn = "uprn" in valuation_data[0] if valuation_data else False + if valuation_has_uprn: + valuation_has_uprn = valuation_data[0]["uprn"] not in ["", None] + + if valuation_has_uprn: + property_valuation = next(( + float(x["valuation"]) for x in valuation_data if + (str(x["uprn"]) == str(uprn)) + ), None) + else: + property_valuation = next(( + float(x["valuation"]) for x in valuation_data if + (x["address"] == address.address) and (x["postcode"] == address.postcode) + ), None) + + # Return data class to give a structured format + return PropertyRequestData( + patch=patch, + non_invasive_recommendations=property_non_invasive_recommendations, + valuation=property_valuation + ) + + +def parse_eco_packages(addr: Address, prepared_epc) -> tuple[list[str], int, str, list[str]] | tuple[ + None, None, None, list]: + solar_identification = addr.solar_reason + cavity_identification = addr.cavity_reason + if not solar_identification and not cavity_identification: + return None, None, None, [] + + landlord_heating_system = addr.landlord_heating_system + # This is the initial version of tackling "already installed" measures + already_installed = [] + if landlord_heating_system == "air source heat pump": + already_installed.append("air_source_heat_pump") + + # We map the categories to the desired measures and upgrade targets + # We note that the categories are placeholder until we move the standardised asset list + + identification_map = { + "Solar Eligible": { + "measures": ["solar_pv", "loft_insulation", "mechanical_ventilation"], + "target_sap": 86, # High B + "plan_type": "solar_eco4" + }, + "Solar Eligible, Solid Wall Uninsulated, EPC E or Below": { + "measures": ["solar_pv", "loft_insulation", "mechanical_ventilation"], + "target_sap": 86, # High B + "plan_type": "solar_eco4" + }, + "Solar Eligible, Needs Heating Upgrade": { + "measures": ["solar_pv", "loft_insulation", "high_heat_retention_storage_heaters", + "mechanical_ventilation"], + "target_sap": 86, # High B + "plan_type": "solar_hhrsh_eco4" + }, + "Non-Intrusive Data Shows Empty Cavity": { + "measures": ["cavity_wall_insulation", "mechanical_ventilation"], + "target_sap": 69, # Low C + "plan_type": "empty_cavity_eco" + }, + 'Non-Intrusive Data Shows Empty Cavity, built after 2002': { + "measures": ["cavity_wall_insulation", "mechanical_ventilation"], + "target_sap": 69, # Low C + "plan_type": "empty_cavity_eco" + }, + "EPC Shows Empty Cavity, inspections show retro drilled": { + # EPC Indicates it's empty, so we simulate a fill + "measures": ["cavity_wall_insulation", "mechanical_ventilation"], + "target_sap": 69, # Low C + "plan_type": "extraction_eco" + }, + "EPC Shows Empty Cavity, inspections show filled at build": { + # EPC Indicates it's empty, so we simulate a fill + "measures": ["cavity_wall_insulation", "mechanical_ventilation"], + "target_sap": 69, # Low C + "plan_type": "extraction_eco" + }, + "EPC Shows Empty Cavity": { + # EPC Indicates it's empty, so we simulate a fill + "measures": ["cavity_wall_insulation", "mechanical_ventilation"], + "target_sap": 69, # Low C + "plan_type": "empty_cavity_eco" + } + } + + # Always prioritise solar + if solar_identification: + _key = solar_identification.split(":")[0] + else: + _key = cavity_identification.split(":")[0] + + mapped = identification_map[_key] + measures = mapped["measures"] + + # If we have already installed an ASHP, we adjust the measures + if "air_source_heat_pump" in already_installed: + if "high_heat_retention_storage_heaters" in measures: + # If we have a HHRSH already, we remove it + measures.remove("high_heat_retention_storage_heaters") + # Add in ASHP (replacing HHRSH if already had) + measures.append("air_source_heat_pump") + + current_sap = prepared_epc.current_energy_efficiency + # If we have a solar package, and the property is a D or above, we don't need to do lofts + if "solar_eco4" in mapped["plan_type"] and current_sap >= 55: + if "loft_insulation" in measures: + measures.remove("loft_insulation") + + return measures, mapped["target_sap"], mapped["plan_type"], already_installed + + +def build_cloudwatch_log_url(start_ms: int) -> str: + """ + Build a CloudWatch Logs URL for the current Lambda invocation, + including timestamp window from start_ms to end_ms (epoch ms). + """ + region = os.environ["AWS_REGION"] + log_group = os.environ["AWS_LAMBDA_LOG_GROUP_NAME"] + log_stream = os.environ["AWS_LAMBDA_LOG_STREAM_NAME"] + + # CloudWatch console requires / encoded as $252F + encoded_group = log_group.replace("/", "$252F") + encoded_stream = log_stream.replace("/", "$252F") + + # Return the full URL with time range + return ( + f"https://console.aws.amazon.com/cloudwatch/home?" + f"region={region}" + f"#logsV2:log-groups/log-group/{encoded_group}" + f"/log-events/{encoded_stream}" + f"$3Fstart={start_ms}" + ) + + +def handle_error(msg, e, subtask_id, status=500, start_ms=None): + # When the pipeline fails, handles error process + cloud_logs_url = build_cloudwatch_log_url(start_ms) + + SubTaskInterface().update_subtask_status( + subtask_id=UUID(subtask_id), + status="failed", + outputs=str(e), + cloud_logs_url=cloud_logs_url + ) + logger.error(msg, exc_info=True) + return Response(status_code=status, content=msg) diff --git a/backend/app/requirements/requirements.txt b/backend/app/requirements/requirements.txt index a213214d..dff7a546 100644 --- a/backend/app/requirements/requirements.txt +++ b/backend/app/requirements/requirements.txt @@ -12,3 +12,5 @@ boto3==1.35.44 openpyxl==3.1.2 # Basic pytz +sqlmodel + diff --git a/backend/app/tasks/__init__.py b/backend/app/tasks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/tasks/router.py b/backend/app/tasks/router.py new file mode 100644 index 00000000..90b62dd1 --- /dev/null +++ b/backend/app/tasks/router.py @@ -0,0 +1,189 @@ +from fastapi import APIRouter, Depends, HTTPException +from uuid import UUID +import json # ← REQUIRED for json.loads + +from backend.app.dependencies import validate_token +from backend.app.tasks.schema import ( + CreateTaskRequest, + UpdateTaskStatusRequest, + CreateSubTaskRequest, + UpdateSubTaskStatusRequest, + FinalizeSubTaskRequest, + TaskSqsTriggerRequest +) + +# Correct location of interfaces +from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface + +from backend.app.db.connection import get_db_session +from backend.app.db.models.tasks import Task, SubTask +from sqlmodel import select + + +router = APIRouter( + prefix="/tasks", + tags=["tasks"], + dependencies=[Depends(validate_token)], +) + + +# ============================================================ +# Create Task +# ============================================================ +@router.post("/", summary="Create a new task and its first subtask") +async def create_task(req: CreateTaskRequest): + tasks = TasksInterface() + task_id, subtask_id = tasks.create_task( + task_source=req.task_source, + service=req.service, + inputs=req.inputs, + ) + return {"task_id": task_id, "subtask_id": subtask_id} + + +# ============================================================ +# Get Task + Subtasks +# ============================================================ +@router.get("/{task_id}", summary="Get a task and its subtasks") +async def get_task(task_id: UUID): + with get_db_session() as session: + task = session.get(Task, task_id) + if not task: + raise HTTPException(status_code=404, detail="Task not found") + + subtasks = session.exec( + select(SubTask).where(SubTask.taskId == task_id) + ).all() + + formatted = [] + for st in subtasks: + formatted.append({ + **st.dict(), + "inputs": json.loads(st.inputs) if st.inputs else None, + "outputs": json.loads(st.outputs) if st.outputs else None, + "cloud_logs_url": st.cloudLogsURL, + }) + + return { + "task": task, + "subtasks": formatted, + } + + +# ============================================================ +# Update Task Status +# ============================================================ +@router.put("/{task_id}/status", summary="Update a task's status") +async def update_task_status(task_id: UUID, req: UpdateTaskStatusRequest): + tasks = TasksInterface() + try: + updated = tasks.update_task_status(task_id, req.status) + return {"task_id": updated.id, "status": updated.status} + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + + +# ============================================================ +# Create Additional Subtask +# ============================================================ +@router.post("/{task_id}/subtasks", summary="Create a new subtask under a task") +async def create_subtask(task_id: UUID, req: CreateSubTaskRequest): + subtasks = SubTaskInterface() + try: + st = subtasks.create_subtask(task_id, req.inputs) + return {"subtask_id": st.id, "task_id": task_id, "status": st.status} + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + + +# ============================================================ +# Update Subtask Status +# ============================================================ +@router.put("/subtask/{subtask_id}/status", summary="Update a subtask's status") +async def update_subtask_status(subtask_id: UUID, req: UpdateSubTaskStatusRequest): + subtasks = SubTaskInterface() + try: + st = subtasks.update_subtask_status(subtask_id, req.status) + return {"subtask_id": st.id, "status": st.status} + except ValueError as e: + raise HTTPException(status_code=404, detail=str(e)) + + +# === +# Sub task is complete +@router.post("/subtask/{subtask_id}/finalize", summary="Finalize a subtask with status, outputs, logs") +async def finalize_subtask(subtask_id: UUID, req: FinalizeSubTaskRequest): + subtasks = SubTaskInterface() + + try: + st = subtasks.finalize_subtask( + subtask_id=subtask_id, + status=req.status, + outputs=req.outputs, + cloud_logs_url=req.cloud_logs_url + ) + + return { + "subtask_id": st.id, + "status": st.status, + "outputs": req.outputs, + "cloud_logs_url": req.cloud_logs_url, + } + + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + + +# for testing: + +import boto3 +import json +from backend.app.tasks.schema import TaskSqsTriggerRequest +from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface +from backend.app.config import get_settings + +sqs = boto3.client("sqs") + +@router.post("/trigger", summary="Create task + subtask and publish to SQS", status_code=202) +async def trigger_task(req: TaskSqsTriggerRequest): + """ + Creates a Task + SubTask, then pushes the SubTask into SQS so a Lambda can process it. + If inputs are empty, automatically replaced with {}. + """ + + settings = get_settings() + + tasks = TasksInterface() + + # ---- Normalize empty inputs ---- + inputs = req.inputs or {} # ensures {} even if null + + # ---- 1. Create Task + SubTask ---- + task_id, subtask_id = tasks.create_task( + task_source=req.task_source, + service=req.service, + inputs=inputs, + ) + + # ---- 2. Prepare SQS payload ---- + sqs_payload = { + "subtask_id": str(subtask_id), + "params": inputs, + } + + try: + response = sqs.send_message( + QueueUrl=f"https://sqs.{settings.AWS_REGION}.amazonaws.com/" + f"{settings.AWS_ACCOUNT_ID}/lambda-example-queue", + MessageBody=json.dumps(sqs_payload) + ) + except Exception as e: + raise HTTPException(status_code=500, detail=f"SQS error: {e}") + + return { + "message": "Task triggered", + "task_id": task_id, + "subtask_id": subtask_id, + "sqs_message_id": response.get("MessageId"), + "inputs_sent": inputs, + } \ No newline at end of file diff --git a/backend/app/tasks/schema.py b/backend/app/tasks/schema.py new file mode 100644 index 00000000..a5b4424b --- /dev/null +++ b/backend/app/tasks/schema.py @@ -0,0 +1,31 @@ +from typing import Optional, Any, Dict +from uuid import UUID +from pydantic import BaseModel + + +class CreateTaskRequest(BaseModel): + task_source: str + service: Optional[str] = None + inputs: Optional[Dict[str, Any]] = None # JSON object + + +class UpdateTaskStatusRequest(BaseModel): + status: str + + +class CreateSubTaskRequest(BaseModel): + inputs: Optional[Dict[str, Any]] = None # JSON object + + +class UpdateSubTaskStatusRequest(BaseModel): + status: str + +class FinalizeSubTaskRequest(BaseModel): + status: str # "complete" or "failed" + outputs: Optional[Dict[str, Any]] = None + cloud_logs_url: Optional[str] = None + +class TaskSqsTriggerRequest(BaseModel): + task_source: str + service: Optional[str] = None + inputs: Dict[str, Any] # forwarded into SubTask.inputs + SQS message \ No newline at end of file diff --git a/backend/app/whlg/__init__.py b/backend/app/whlg/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/whlg/router.py b/backend/app/whlg/router.py new file mode 100644 index 00000000..eaaffb41 --- /dev/null +++ b/backend/app/whlg/router.py @@ -0,0 +1,77 @@ +# import boto3 +# import json +# import math +# import asyncio +# import random +# +# from datetime import datetime + +from fastapi import APIRouter, Depends +from backend.app.dependencies import validate_token +# from backend.app.plan.schemas import PlanTriggerRequest +from backend.app.config import get_settings +# from sqlalchemy.orm import sessionmaker +from utils.logger import setup_logger +# from backend.app.db.connection import db_engine +# from backend.app.db.functions.recommendations_functions import create_scenario +# import pandas as pd +from backend.app.whlg.schema import WHLGElligibilityRequest + +# from utils.s3 import read_csv_from_s3 +# from sqlalchemy.dialects.postgresql import insert +# from backend.app.db.connection import get_db_session +# from backend.app.db.models.whlg import Whlg +# from backend.app.db.functions.whlg_functions import upsert_whlg_postcode + +logger = setup_logger() + +if get_settings().ENVIRONMENT == "local": + router = APIRouter( + prefix="/whlg", + tags=["whlg"], + ) + +else: + router = APIRouter( + prefix="/whlg", + tags=["whlg"], + dependencies=[Depends(validate_token)], + responses={404: {"description": "Not found"}} + ) + + +@router.get("/") +async def whlg_entrypoint(): + # body needs to include postcode, UPRN [task ID?] + # + # Refer to the plan trigger route for code + # 1) Create an event schema and store it in the schemas file + # 2) Build the tasks functions + # 3) Read in the funding csx. This can be found as such: + # whlg_eligible_postcodes = read_csv_from_s3( + # bucket_name=get_settings().DATA_BUCKET, + # filepath="funding/whlg eligible postcodes.csv", + # ) + # whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) + # Check the postcode against this file + # We need to store this somewhere????!!!??!??!?!?!?!??!??!??!??!??!??!??!??!??!??! Create a new table! + # Update subtask to be complete + # Once this is complete, build the logs stuff, add the cloudwatch logs ID to the database + return {"hello": "from whlg"} + + +@router.post("/eligible") +async def eligiable(body: WHLGElligibilityRequest): + # postcode = body.postcode or "" + # postcode = postcode.lower().replace(" ", "") + # + # whlg_eligible_postcodes = read_csv_from_s3( + # bucket_name=get_settings().DATA_BUCKET, + # filepath="funding/whlg eligible postcodes.csv", + # ) + # whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes) + # whlg_eligible_postcodes['Postcode'] = whlg_eligible_postcodes['Postcode'].str.replace(' ', '', regex=False) + # + # is_eligible = postcode in whlg_eligible_postcodes['Postcode'].values + # return {"whlg_eligible": is_eligible} + return None diff --git a/backend/app/whlg/schema.py b/backend/app/whlg/schema.py new file mode 100644 index 00000000..648ecbf3 --- /dev/null +++ b/backend/app/whlg/schema.py @@ -0,0 +1,4 @@ +from pydantic import BaseModel, Field + +class WHLGElligibilityRequest(BaseModel): + postcode: str = Field(..., example="B93 8SY") \ No newline at end of file diff --git a/backend/condition/__init__.py b/backend/condition/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/condition/file_type.py b/backend/condition/file_type.py new file mode 100644 index 00000000..b9a4357f --- /dev/null +++ b/backend/condition/file_type.py @@ -0,0 +1,12 @@ +from enum import Enum + +class FileType(Enum): + LBWF = "lbwf" + +def detect_file_type(filepath: str) -> FileType: + path = filepath.lower() + + if "lbwf" in path: + return FileType.LBWF + + raise ValueError("Unrecognised file path") \ No newline at end of file diff --git a/backend/condition/handler.py b/backend/condition/handler.py new file mode 100644 index 00000000..5279b029 --- /dev/null +++ b/backend/condition/handler.py @@ -0,0 +1,16 @@ +from typing import Mapping, Any +from io import BytesIO + +from utils.logger import setup_logger +from backend.condition.processor import process_file + + +logger = setup_logger() + +def handler(event: Mapping[str, Any], context: Any) -> None: + # Temporary stub for PoC wiring + dummy_stream = BytesIO(b"") + + source_key = event.get("source_key", "unknown-source") + + process_file(dummy_stream, source_key) \ No newline at end of file diff --git a/backend/condition/local_runner.py b/backend/condition/local_runner.py new file mode 100644 index 00000000..28f9b06c --- /dev/null +++ b/backend/condition/local_runner.py @@ -0,0 +1,25 @@ +from pathlib import Path + +from backend.condition.processor import process_file + +def main() -> None: + try: + # Works in scripts / debugger / pytest + ROOT_DIR = Path(__file__).resolve().parents[1] + except NameError: + # __file__ is not defined in notebooks + ROOT_DIR = Path.cwd() + + path: Path = ROOT_DIR / "condition" / "sample_data" + + lbwf_path: Path = path / "lbwf" / "LBWF - Example Asset Data September 2025.xlsx" # TODO: get this from s3 as part of devcontainer init + + with lbwf_path.open("rb") as f: + process_file( + file_stream=f, + source_key=lbwf_path.as_posix(), + ) + +if __name__ == "__main__": + main() + diff --git a/backend/condition/parsing/factory.py b/backend/condition/parsing/factory.py new file mode 100644 index 00000000..01dce75d --- /dev/null +++ b/backend/condition/parsing/factory.py @@ -0,0 +1,9 @@ +from backend.condition.file_type import FileType +from backend.condition.parsing.parser import Parser +from backend.condition.parsing.lbwf_parser import LbwfParser + +def select_parser(file_type: FileType) -> Parser: + if file_type is FileType.LBWF: + return LbwfParser() + + raise ValueError("Unrecognised file type, unable to instantiate Parser") diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py new file mode 100644 index 00000000..8d52f6d5 --- /dev/null +++ b/backend/condition/parsing/lbwf_parser.py @@ -0,0 +1,180 @@ +from typing import BinaryIO, Any, Dict, Iterator, List, Tuple +from openpyxl import Workbook, load_workbook +from collections import defaultdict + +from backend.condition.parsing.parser import Parser +from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition +from backend.condition.parsing.records.lbwf.lbwf_house import LbwfHouse +from backend.condition.utils.date_utils import normalise_date +from utils.logger import setup_logger + +logger = setup_logger + +class LbwfParser(Parser): + + def parse(self, file_stream: BinaryIO) -> Any: + wb: Workbook = load_workbook(file_stream) + address_to_uprn_map: Dict[str, int] = self._generate_address_to_uprn_dict(wb) + + assets = self._parse_assets(wb) + houses = self._parse_houses(wb, address_to_uprn_map) + + self._merge_assets_into_houses(assets, houses) + + return houses + + @staticmethod + def _parse_assets(wb: Workbook) -> List[LbwfAssetCondition]: + assets_sheet = wb["Houses Asset Data"] + asset_rows = assets_sheet.iter_rows(values_only=True) + + asset_headers = next(asset_rows) + asset_header_indexes = LbwfParser._get_column_indexes_by_name(asset_headers) + + assets: List[LbwfAssetCondition] = [] + for row in asset_rows: + try: + assets.append( + LbwfParser._map_row_to_asset_record(row, asset_header_indexes) + ) + except Exception as e: + logger.error(f"Error mapping LBWF row to asset record: {e}") + continue + + return assets + + @staticmethod + def _parse_houses( + wb: Workbook, + address_to_uprn_map: Dict[str, int], + ) -> List[LbwfHouse]: + houses_sheet = wb["Houses"] + house_rows = houses_sheet.iter_rows(values_only=True) + + house_headers = next(house_rows) + house_header_indexes = LbwfParser._get_column_indexes_by_name(house_headers) + + houses: List[LbwfHouse] = [] + for row in house_rows: + try: + houses.append( + LbwfParser._map_row_to_house_record( + row, + house_header_indexes, + address_to_uprn_map, + ) + ) + except Exception as e: + logger.error(f"Error mapping LBWF row to house record: {e}") + continue + + return houses + + @staticmethod + def _merge_assets_into_houses( + assets: List[LbwfAssetCondition], + houses: List[LbwfHouse], + ) -> None: + assets_by_ref: Dict[int, List[LbwfAssetCondition]] = defaultdict(list) + for asset in assets: + assets_by_ref[asset.prop_ref].append(asset) + + for house in houses: + house.assets = assets_by_ref.get(house.reference, []) + + + @staticmethod + def _map_row_to_house_record( + row: Any | Tuple[object | None, ...], + header_indexes: Dict[str, int], + address_to_uprn_map: Dict[str, int], + ) -> LbwfHouse: + address: str = row[header_indexes["Address"]] + + return LbwfHouse( + uprn=LbwfParser._get_uprn_from_address(address, address_to_uprn_map), + reference=row[header_indexes["Reference"]], + address=address, + epc=row[header_indexes["EPC "]], + shdf=row[header_indexes["SHDF"]], + house=row[header_indexes["HOSUE"]], + fail_decency=row[header_indexes["Fail Decency"]], + assets=[], + ) + + @staticmethod + def _map_row_to_asset_record( + row: Any | Tuple[object | None, ...], + header_indexes: Dict[str, int], + ) -> LbwfAssetCondition: + return LbwfAssetCondition( + prop_ref=row[header_indexes["PROP REF"]], + domna=row[header_indexes["Domna"]], + address=row[header_indexes["ADDRESS"]], + ownership=row[header_indexes["OWNERSHIP"]], + prop_status=row[header_indexes["PROP STATUS"]], + prop_type=row[header_indexes["PROP TYPE"]], + prop_sub_type=row[header_indexes["PROP SUB TYPE"]], + element_group=row[header_indexes["ELEMENT GROUP"]], + element_code=row[header_indexes["ELEMENT CODE"]], + element_code_description=row[header_indexes["ELEMENT CODE DESCRIPTION"]], + attribute_code=row[header_indexes["ATTRIBUTE CODE"]], + attribute_code_description=row[header_indexes["ATTRIBUTE CODE DESCRIPTION"]], + element_date_value=row[header_indexes["ELEMENT DATE VALUE"]], + element_numerical_value=row[header_indexes["ELEMENT NUMERIC VALUE"]], + element_text_value=row[header_indexes["ELEMENT TEXT VALUE"]], + quantity=row[header_indexes["QUANTITY"]], + install_date=normalise_date(row[header_indexes["INSTALL DATE"]]), + remaining_life=row[header_indexes["REMAINING LIFE"]], + element_comments=row[header_indexes["ELEMENT COMMENTS"]], + ) + + + @staticmethod + def _generate_address_to_uprn_dict(wb: Workbook) -> Dict[str, int | None]: + sheet: Workbook = wb["All Energy Breakdown "] + + rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True) + + headers = next(rows) + header_indexes: Dict[str, int] = LbwfParser._get_column_indexes_by_name(headers) + + address_idx = header_indexes["Address"] + uprn_idx = header_indexes["UPRN"] + + mapping: Dict[str, int | None] = {} + + for row in rows: + address = row[address_idx] + uprn = row[uprn_idx] + + if not isinstance(address, str): + continue + + if uprn is not None and not isinstance(uprn, int): + raise ValueError(f"Unexpected UPRN value: {uprn!r}") + + mapping[address] = uprn + + return mapping + + + def _get_column_indexes_by_name( + headers: Tuple[object | None, ...] + ) -> Dict[str, int]: + index: Dict[str, int] = {} + + for i, header in enumerate(headers): + if isinstance(header, str): + index[header] = i + + return index + + def _get_uprn_from_address(address: str, address_to_uprn_map: Dict[str, int]) -> int | None: + pseudo_name = address.split(",")[0] + + if pseudo_name.lower() in (k.lower() for k in address_to_uprn_map.keys()): + return address_to_uprn_map[pseudo_name.upper()] + + return None + diff --git a/backend/condition/parsing/parser.py b/backend/condition/parsing/parser.py new file mode 100644 index 00000000..105fda36 --- /dev/null +++ b/backend/condition/parsing/parser.py @@ -0,0 +1,8 @@ +from abc import ABC, abstractmethod +from typing import BinaryIO, Any + +class Parser(ABC): + + @abstractmethod + def parse(self, file_stream: BinaryIO) -> Any: + pass \ No newline at end of file diff --git a/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py b/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py new file mode 100644 index 00000000..dffd1e53 --- /dev/null +++ b/backend/condition/parsing/records/lbwf/lbwf_asset_condition.py @@ -0,0 +1,26 @@ +from dataclasses import dataclass +from datetime import date + + +@dataclass +class LbwfAssetCondition: + prop_ref: int + domna: int + address: str + ownership: str + prop_status: str + prop_type: str # TODO: make this enum? + prop_sub_type: str # TODO: make this enum? + element_group: str + element_code: str + element_code_description: str + attribute_code: str + attribute_code_description: str + element_date_value: str | None = None + element_numerical_value: int | None = None + element_text_value: str | None = None + quantity: int | None = None + install_date: date | None = None + remaining_life: int | None = None + element_comments: str | None = None + diff --git a/backend/condition/parsing/records/lbwf/lbwf_house.py b/backend/condition/parsing/records/lbwf/lbwf_house.py new file mode 100644 index 00000000..6db16862 --- /dev/null +++ b/backend/condition/parsing/records/lbwf/lbwf_house.py @@ -0,0 +1,15 @@ +from dataclasses import dataclass +from typing import List + +from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition + +@dataclass +class LbwfHouse: + uprn: int + reference: int + address: str + epc: str # TODO: make enum + shdf: bool + house: str + fail_decency: int + assets: List[LbwfAssetCondition] \ No newline at end of file diff --git a/backend/condition/processor.py b/backend/condition/processor.py new file mode 100644 index 00000000..fb06c888 --- /dev/null +++ b/backend/condition/processor.py @@ -0,0 +1,18 @@ +from typing import Any, BinaryIO, List + +from backend.condition.parsing.parser import Parser +from utils.logger import setup_logger +from backend.condition.file_type import FileType, detect_file_type +from backend.condition.parsing.factory import select_parser + +def process_file(file_stream: BinaryIO, source_key: str) -> None: + print(f"[processor] Received file: {source_key}") + + # Instantiation + file_type: FileType = detect_file_type(source_key) + parser: Parser = select_parser(file_type) + + # Orchestration + records: List[Any] = parser.parse(file_stream) + + print(records) # temp \ No newline at end of file diff --git a/backend/condition/tests/parsing/test_lbwf_parser.py b/backend/condition/tests/parsing/test_lbwf_parser.py new file mode 100644 index 00000000..7556b845 --- /dev/null +++ b/backend/condition/tests/parsing/test_lbwf_parser.py @@ -0,0 +1,134 @@ +from typing import Any +import pytest +from io import BytesIO +from openpyxl import Workbook +from datetime import datetime + +from backend.condition.parsing.lbwf_parser import LbwfParser +from backend.condition.parsing.records.lbwf.lbwf_asset_condition import LbwfAssetCondition +from backend.condition.parsing.records.lbwf.lbwf_house import LbwfHouse + +@pytest.fixture +def lbwf_homes_xlsx_bytes() -> BytesIO: + wb = Workbook() + houses_asset_data = wb.active + houses_asset_data.title = "Houses Asset Data" + houses_asset_data.append([ + "PROP REF", + "Domna", + "ADDRESS", + "OWNERSHIP", + "PROP STATUS", + "PROP TYPE", + "PROP SUB TYPE", + "ELEMENT GROUP", + "ELEMENT CODE", + "ELEMENT CODE DESCRIPTION", + "ATTRIBUTE CODE", + "ATTRIBUTE CODE DESCRIPTION", + "ELEMENT DATE VALUE", + "ELEMENT NUMERIC VALUE", + "ELEMENT TEXT VALUE", + "QUANTITY", + "INSTALL DATE", + "REMAINING LIFE", + "ELEMENT COMMENTS" + ] + ) + houses_asset_data.append([ + 12345, + 12345, + "123 Fake Street, London, A10 1AB", + "LBWF_OWNED", + "OCCP", + "HOU", + "TERRACED", + "ASSETS", + "AHR_CAT", + "Accessible Housing Register Category", + "F", + "General Needs", + None, + None, + None, + 1, + None, + None, + None, + ]) + houses_asset_data.append([ + 54321, + 54321, + "100 Random Road, London, A10 1AB", + "LBWF_OWNED", + "OCCP", + "HOU", + "EOT", + "ASSETS", + "INTSMKDET", + "Smoke Detectors in Property", + "HARDWRDMNS", + "Hard Wired Mains Smoke Alarm in Property", + None, + None, + None, + 2, + datetime(2019,4,1), + 4, + "Source of Data = Joe Bloggs", + ]) + + houses = wb.create_sheet("Houses") + houses.append(["Reference", "Address", "EPC ", "SHDF", "HOSUE", "Fail Decency"]) + houses.append([12345, "123 Fake Street, London, A10 1AB", "E", "NO", "HOUSE", 2025]) + houses.append([54321, "100 Random Road, London, A10 1AB", "F", "NO", "HOUSE", 2025]) + + all_energy_breakdown = wb.create_sheet("All Energy Breakdown ") # Trailing space is intentional; matches source + all_energy_breakdown.append([ + "UPRN", + "Organisation Reference", + "Alternate Organisation Reference", + "Address", + "Postcode" + ]) + all_energy_breakdown.append([ + 1, + 200, + None, + "123 FAKE STREET", + "A10 1AB" + ]) + all_energy_breakdown.append([ + 2, + 100, + 101, + "100 RANDOM ROAD", + "A10 1AB" + ]) + + stream = BytesIO() + wb.save(stream) + stream.seek(0) + + return stream + +def test_lbwf_parser_passes_houses(lbwf_homes_xlsx_bytes): + # arrange + parser = LbwfParser() + + # act + result: Any = parser.parse(lbwf_homes_xlsx_bytes) + + # assert + # TODO: Improve these asserts + assert len(result) == 2 + + assert isinstance(result[0], LbwfHouse) + assert result[0].uprn == 1 + assert len(result[0].assets) == 1 + assert isinstance(result[0].assets[0], LbwfAssetCondition) + + assert isinstance(result[1], LbwfHouse) + assert result[1].uprn == 2 + assert len(result[1].assets) == 1 + assert isinstance(result[1].assets[0], LbwfAssetCondition) \ No newline at end of file diff --git a/backend/condition/tests/parsing/test_parsing_factory.py b/backend/condition/tests/parsing/test_parsing_factory.py new file mode 100644 index 00000000..481418d7 --- /dev/null +++ b/backend/condition/tests/parsing/test_parsing_factory.py @@ -0,0 +1,15 @@ +import pytest + +from backend.condition.parsing.factory import select_parser +from backend.condition.file_type import FileType + +def test_selects_lbwf_parser(): + # arrange + file_type = FileType.LBWF + expected_class_name = "LbwfParser" + + # act + actual_class_name = select_parser(file_type).__class__.__name__ + + # assert + assert expected_class_name == actual_class_name \ No newline at end of file diff --git a/backend/condition/tests/test_detect_file_type.py b/backend/condition/tests/test_detect_file_type.py new file mode 100644 index 00000000..fecf22c1 --- /dev/null +++ b/backend/condition/tests/test_detect_file_type.py @@ -0,0 +1,22 @@ +import pytest + +from backend.condition.file_type import FileType, detect_file_type + +def test_detects_lbwf_file_type(): + # arrange + file_path_str = "uploads/lbwf/Exaple Asset Data.xlsx" + expected_file_type = FileType.LBWF + + # act + actual_file_type: FileType = detect_file_type(file_path_str) + + # assert + assert expected_file_type == actual_file_type + +def test_unknown_filepath_raises_value_error(): + # arrange + file_path_str = "unknown/Example Asset Data.xlsx" + + # act + assert + with pytest.raises(ValueError): + detect_file_type(file_path_str) \ No newline at end of file diff --git a/backend/condition/utils/date_utils.py b/backend/condition/utils/date_utils.py new file mode 100644 index 00000000..713d151c --- /dev/null +++ b/backend/condition/utils/date_utils.py @@ -0,0 +1,18 @@ +from datetime import datetime, date +from typing import Any + + +def normalise_date(value: Any, allow_none: bool = True) -> date | None: + if value is None and allow_none: + return None + + if isinstance(value, datetime): + return value.date() + + if isinstance(value, str): + try: + return datetime.strptime(value.strip(), "%d/%m/%Y").date() + except ValueError as exc: + raise ValueError(f"Invalid date string: {value!r}") from exc + + raise ValueError(f"Unexpected date value: {value!r}") \ No newline at end of file diff --git a/backend/diagnostics/portfolio_diagnostics.py b/backend/diagnostics/portfolio_diagnostics.py new file mode 100644 index 00000000..bcdec24e --- /dev/null +++ b/backend/diagnostics/portfolio_diagnostics.py @@ -0,0 +1,3 @@ +""" +This script is set up to perform broad portfolio diagnostics to identify potential issues +""" diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 2e1ede79..50ed0772 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1,4 +1,4 @@ -import ast +import time import json from copy import deepcopy from datetime import datetime @@ -6,74 +6,50 @@ from datetime import datetime from tqdm import tqdm import pandas as pd import numpy as np -from etl.epc.Record import EPCRecord +from uuid import UUID + from backend.SearchEpc import SearchEpc + +from etl.epc.Record import EPCRecord from sqlalchemy.exc import IntegrityError, OperationalError -from sqlalchemy.orm import sessionmaker from starlette.responses import Response +from backend.app.BatterySapScorer import BatterySAPScorer from backend.app.config import get_settings, get_prediction_buckets -from backend.app.db.connection import db_engine -from backend.app.db.functions.materials_functions import get_materials -from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations -from backend.app.db.functions.property_functions import ( - create_property, create_property_details_epc, create_property_targets, update_property_data, - update_or_create_property_spatial_details +from backend.app.db.connection import db_session, db_read_session +import backend.app.db.functions as db_funcs +from backend.app.db.functions.tasks.Tasks import SubTaskInterface + +from backend.app.plan.schemas import PlanTriggerRequest +from backend.app.plan.utils import ( + get_cleaned, patch_epc, extract_property_request_data, parse_eco_packages, handle_error, build_cloudwatch_log_url ) -from backend.app.db.functions.recommendations_functions import ( - create_plan, upload_recommendations, create_scenario -) -from backend.app.db.functions.funding_functions import upload_funding -from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn -from backend.app.db.models.portfolio import rating_lookup -from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES -from backend.app.plan.utils import get_cleaned from backend.app.utils import sap_to_epc import backend.app.assumptions as assumptions from backend.ml_models.api import ModelApi +from backend.ml_models.Valuation import PropertyValuation from backend.Property import Property from backend.apis.GoogleSolarApi import GoogleSolarApi +from backend.addresses.Addresses import Addresses from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser import recommendations.optimiser.optimiser_functions as optimiser_functions from recommendations.Recommendations import Recommendations -from utils.logger import setup_logger -from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 -from backend.ml_models.Valuation import PropertyValuation +from recommendations.optimiser.funding_optimiser import optimise_with_scenarios from etl.bill_savings.KwhData import KwhData from etl.spatial.OpenUprnClient import OpenUprnClient from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc -from backend.Funding import Funding -from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths -from recommendations.recommendation_utils import convert_thickness_to_numeric, get_wall_u_value +from utils.logger import setup_logger +from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 logger = setup_logger() BATCH_SIZE = 5 -SCORING_BATCH_SIZE = 100 - - -def patch_epc(patch, epc_records): - """ - This utility function is useful to patch the epc data if we have data from the customer - :return: - """ - - for patch_variable, patch_value in patch.items(): - - if patch_variable in ["address", "postcode"]: - continue - - if patch_value == "": - continue - if patch_variable in epc_records["original_epc"]: - epc_records["original_epc"][patch_variable] = patch_value - - return epc_records +SCORING_BATCH_SIZE = 300 def extract_portfolio_aggregation_data( @@ -145,14 +121,17 @@ def extract_portfolio_aggregation_data( cost = sum([r["total"] for r in default_recommendations]) sap_point_improvement = sum([r["sap_points"] for r in default_recommendations]) - lower_bound_valuation_uplift = ( - property_value_increase_ranges[p.id]["lower_bound_increased_value"] - - property_value_increase_ranges[p.id]["current_value"] - ) - upper_bound_valuation_uplift = ( - property_value_increase_ranges[p.id]["upper_bound_increased_value"] - - property_value_increase_ranges[p.id]["current_value"] - ) + if not pd.isnull(property_value_increase_ranges[p.id]["current_value"]): + lower_bound_valuation_uplift = ( + property_value_increase_ranges[p.id]["lower_bound_increased_value"] - + property_value_increase_ranges[p.id]["current_value"] + ) + upper_bound_valuation_uplift = ( + property_value_increase_ranges[p.id]["upper_bound_increased_value"] - + property_value_increase_ranges[p.id]["current_value"] + ) + else: + lower_bound_valuation_uplift, upper_bound_valuation_uplift = 0, 0 agg_data.append({ "pre_retrofit_epc": p.data["current-energy-rating"], @@ -346,75 +325,6 @@ def get_request_property_data(body: PlanTriggerRequest): return patches, already_installed, non_invasive_recommendations, valuation_data -def extract_property_request_data( - config, patches, already_installed, non_invasive_recommendations, valuation_data, uprn -): - patch_has_uprn = "uprn" in patches[0] if patches else True - if patch_has_uprn: - patch = next(( - x for x in patches if str(x["uprn"]) == str(config["uprn"]) - ), {}) - else: - patch = next(( - x for x in patches if (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - ), {}) - - property_already_installed = next(( - x for x in already_installed if - (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - ), {}) - - # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN - # we need to check existence of uprn - has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False - if has_uprn: - has_uprn = non_invasive_recommendations[0]["uprn"] not in ["", None] - - if has_uprn: - property_non_invasive_recommendations = next(( - x for x in non_invasive_recommendations if - (str(x["uprn"]) == str(uprn)) - ), {}) - - # We patch the non-invasive recs that are ['cavity_extract_and_refill'] - else: - property_non_invasive_recommendations = next(( - x for x in non_invasive_recommendations if - (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - ), {}) - - if isinstance(property_non_invasive_recommendations.get("recommendations"), str): - property_non_invasive_recommendations["recommendations"] = ast.literal_eval( - property_non_invasive_recommendations["recommendations"] - ) - transformed = [] - for rec in property_non_invasive_recommendations["recommendations"]: - if isinstance(rec, str): - transformed.append({"type": rec, }) - else: - transformed.append(rec) - - property_non_invasive_recommendations["recommendations"] = transformed - - # Check if the valuation data has uprn - valuation_has_uprn = "uprn" in valuation_data[0] if valuation_data else False - if valuation_has_uprn: - valuation_has_uprn = valuation_data[0]["uprn"] not in ["", None] - - if valuation_has_uprn: - property_valution = next(( - float(x["valuation"]) for x in valuation_data if - (str(x["uprn"]) == str(uprn)) - ), None) - else: - property_valution = next(( - float(x["valuation"]) for x in valuation_data if - (x["address"] == config["address"]) and (x["postcode"] == config["postcode"]) - ), None) - - return patch, property_already_installed, property_non_invasive_recommendations, property_valution - - def get_funding_data(): """ This function retrieves the eco project scores matrix and the warm homes local grant funding data @@ -453,26 +363,202 @@ def get_funding_data(): return project_scores_matrix, partial_project_scores_matrix, whlg_eligible_postcodes +def check_duplicate_uprns(plan_input): + """ + Simple function to check if the input data contains duplicated UPRNS. + If there are duplicates, an exception will be rasied + :return: + """ + # Check for duplicate UPRNS + input_uprns = [x.get("uprn") for x in plan_input if "uprn" in x and x.get("uprn")] + + if input_uprns: + # Check for dupes + if len(input_uprns) != len(set(input_uprns)): + # Find the duplicate UPRNs + duplicates = set([x for x in input_uprns if input_uprns.count(x) > 1]) + # de-dupe input_uprns + raise ValueError(f"Duplicate UPRNs in the input data: {duplicates}") + + return True + + +def check_duplicate_property_ids(input_properties): + """ + Simple function to check if the input data contains duplicated property IDs. This will happen in very rare + cases where we have properties across different servers, where the input UPRN is possibly incorrect and we + find the right property via an address search, instead of a UPRN search and so we end up with the same property + twice. + :param input_properties: + :return: + """ + + input_property_ids = [x.id for x in input_properties] + + if input_property_ids: + # Check for dupes + if len(input_property_ids) != len(set(input_property_ids)): + # Find the duplicate property IDs + duplicates = set([x for x in input_property_ids if input_property_ids.count(x) > 1]) + # de-dupe input_uprns + raise ValueError(f"Duplicate property IDs in the input data: {duplicates}") + + # Check for dupe UPRNS + input_uprns = [x.uprn for x in input_properties if x.uprn is not None] + if input_uprns: + if len(input_uprns) != len(set(input_uprns)): + duplicates = set([x for x in input_uprns if input_uprns.count(x) > 1]) + raise ValueError(f"Duplicate UPRNs in the input properties: {duplicates}") + + return True + + +def averages_cleaning(prepared_epc: EPCRecord, cleaning_data: pd.DataFrame): + """ + Placeholder cleaning function to handle edge cases where we have missing data for + number of habitable rooms, number of heated rooms and floor height. We take the median + This need was born out of the Peabody project + :param prepared_epc: + :param cleaning_data: + :return: + """ + + variables_to_clean = [ + "number_habitable_rooms", + "number_heated_rooms", + "floor_height", + ] + + if not any([pd.isnull(prepared_epc.prepared_epc[k]) for k in variables_to_clean]): + # Nothing to do + return prepared_epc + + # Clean with cleaning_data + clean_with = cleaning_data[ + (cleaning_data["property_type"] == prepared_epc.prepared_epc["property_type"]) & + (cleaning_data["property_type"] == prepared_epc.prepared_epc["property_type"]) + ] + if prepared_epc.prepared_epc["local_authority"] in clean_with["local_authority"].values: + clean_with = clean_with[ + clean_with["local_authority"] == prepared_epc.prepared_epc["local_authority"] + ] + + floor_area_clean_with = clean_with[ + (clean_with["total_floor_area"] <= prepared_epc.prepared_epc["total_floor_area"] * 1.1) & + (clean_with["total_floor_area"] >= prepared_epc.prepared_epc["total_floor_area"] * 0.9) + ] + + if not floor_area_clean_with.empty: + clean_with = floor_area_clean_with + + clean_n_habitable_rooms = int(round(clean_with["number_habitable_rooms"].median())) + clean_n_heated_rooms = int(round(clean_with["number_heated_rooms"].median())) + if clean_n_heated_rooms > clean_n_habitable_rooms: + clean_n_heated_rooms = clean_n_habitable_rooms + + clean_floor_height = clean_with["floor_height"].median() + + # We now fill + if not pd.isnull(clean_n_habitable_rooms) and pd.isnull( + prepared_epc.prepared_epc["number_habitable_rooms"]): + prepared_epc.prepared_epc["number_habitable_rooms"] = clean_n_habitable_rooms + prepared_epc.number_habitable_rooms = clean_n_habitable_rooms + + if not pd.isnull(clean_n_heated_rooms) and pd.isnull( + prepared_epc.prepared_epc["number_heated_rooms"]): + prepared_epc.prepared_epc["number_heated_rooms"] = clean_n_heated_rooms + prepared_epc.number_heated_rooms = clean_n_heated_rooms + + if not pd.isnull(clean_floor_height) and pd.isnull( + prepared_epc.prepared_epc["floor_height"]): + prepared_epc.prepared_epc["floor_height"] = clean_floor_height + prepared_epc.floor_height = clean_floor_height + + # if pd.isnull(prepared_epc.lighting_cost_current): + # # This is a basic assumption as an average + # prepared_epc.prepared_epc["lighting_cost_current"] = assumptions.AVERAGE_LIGHTING_COST + # prepared_epc.lighting_cost_current = assumptions.AVERAGE_LIGHTING_COST + + # if pd.isnull(prepared_epc.heating_cost_current): + # # This is a basic assumption as an average + # appliance_cost = AnnualBillSavings.estimate_appliances_energy_use( + # total_floor_area=prepared_epc.total_floor_area + # ) * AnnualBillSavings.ELECTRICITY_PRICE_CAP + # heating_cleaned_value = assumptions.AVERAGE_HEATING_AND_APPLIANCE_COST - appliance_cost + # prepared_epc.prepared_epc["heating_cost_current"] = heating_cleaned_value + # prepared_epc.heating_cost_current = heating_cleaned_value + # + # if pd.isnull(prepared_epc.hot_water_cost_current): + # # This is a basic assumption as an average + # prepared_epc.prepared_epc["hot_water_cost_current"] = assumptions.AVERAGE_HOT_WATER_COST + # prepared_epc.hot_water_cost_current = assumptions.AVERAGE_HOT_WATER_COST + # + # if pd.isnull(prepared_epc.energy_consumption_potential): + # # Set to current + # prepared_epc.prepared_epc["energy_consumption_potential"] = prepared_epc.energy_consumption_current + # prepared_epc.energy_consumption_potential = prepared_epc.energy_consumption_current + + return prepared_epc + + +def extract_address_data(config, body): + """ + Simple helper to grab address data from the config + :return: + """ + uprn = config.get("uprn", None) + if pd.isnull(uprn): + uprn = None + if uprn: + uprn = int(float(uprn)) + + address1 = config.get("address", None) + # Handle domna address list format + if pd.isnull(address1) and body.file_format == "domna_asset_list": + address1 = config.get("domna_address_1", None) + + address1 = str(int(address1)) if isinstance(address1, float) else str(address1) + full_address = config.get("domna_full_address", "") if body.file_format == "domna_asset_list" else None + if not isinstance(full_address, str): # Catch for when the full address is nan + full_address = None + + return uprn, address1, full_address + + +def keep_max_sap_per_measure_type(items): + # First pass: find max sap_points per measure_type + max_by_type = {} + for item in items: + t = item["measure_type"] + max_by_type[t] = max(max_by_type.get(t, float("-inf")), item["sap_points"]) + + # Second pass: keep only items matching the max for their type + output = [] + for measure_type, points in max_by_type.items(): + to_consider = [x for x in items if x["measure_type"] == measure_type and x["sap_points"] == points] + output.append(to_consider[0]) # pick the first one in case of ties + + return output + + async def model_engine(body: PlanTriggerRequest): logger.info("Model Engine triggered with body: %s", json.loads(body.model_dump_json())) - logger.info("Connecting to db") - session = sessionmaker(bind=db_engine)() created_at = datetime.now().isoformat() - - # TODO: if the measure is already installed, it should actually be the very first phase + start_ms = int(time.time() * 1000) try: - session.begin() logger.info("Getting the inputs") if body.file_type == "xlsx": + logger.info("Getting the plan input") plan_input = read_excel_from_s3( bucket_name=get_settings().PLAN_TRIGGER_BUCKET, file_key=body.trigger_file_path, sheet_name=body.sheet_name, header_row=0, ) + logger.info("Got the plan input from excel") # We now handle the case where the input data is a Domna standardised assset list if body.file_format == "domna_asset_list": @@ -480,16 +566,35 @@ async def model_engine(body: PlanTriggerRequest): plan_input = plan_input.rename( columns={"domna_address_1": "address", "domna_postcode": "postcode", "epc_os_uprn": "uprn"} ) - # Where the EPC has been estimated, that is because a UPRN wasn't avaialble and so we remote UPRN - plan_input["uprn"] = np.where(plan_input["estimated"].isin([1, True]), None, plan_input["uprn"]) + # Where the EPC has been estimated, that is because a UPRN wasn't avaialble and so we remove UPRN + # This will be reflexted + if "estimated" not in plan_input.columns: + plan_input["estimated"] = False + + plan_input["uprn"] = np.where( + plan_input["estimated"].isin([1, True]) & ( + (plan_input["uprn"] < 0) | pd.isnull(plan_input["uprn"]) + ), None, plan_input["uprn"] + ) # We handle the landlord property type and built form plan_input["property_type"] = plan_input["landlord_property_type"].copy() - plan_input["built_form"] = plan_input["landlord_built_form"].copy() + if "landlord_built_form" in plan_input.columns: + plan_input["built_form"] = plan_input["landlord_built_form"].copy() + else: + plan_input["built_form"] = None + + if "epc_property_type" not in plan_input.columns: + plan_input["epc_property_type"] = None + plan_input["property_type"] = np.where( plan_input["property_type"] == "unknown", plan_input["epc_property_type"], plan_input["property_type"] ) + + if "epc_archetype" not in plan_input.columns: + plan_input["epc_archetype"] = None + plan_input["built_form"] = np.where( plan_input["built_form"] == "unknown", plan_input["epc_archetype"], plan_input["built_form"] ) @@ -500,7 +605,7 @@ async def model_engine(body: PlanTriggerRequest): "bungalow": "Bungalow", "block house": "House", "coach house": "House", - "bedsit": "Flat" + "bedsit": "Flat", } built_form_map = { @@ -512,90 +617,165 @@ async def model_engine(body: PlanTriggerRequest): "enclosed mid-terrace": "Enclosed Mid-Terrace", } # We remap the values to match the EPC expected formats - plan_input["property_type"] = plan_input["property_type"].map(property_type_map) - plan_input["built_form"] = plan_input["built_form"].map(built_form_map) + + # This syntax will actually retain any original values, if they don't get mapped + plan_input["property_type"] = ( + plan_input["property_type"] + .map(property_type_map) + .fillna(plan_input["property_type"]) + ) + + plan_input["built_form"] = ( + plan_input["built_form"] + .map(built_form_map) + .fillna(plan_input["built_form"]) + ) plan_input = plan_input.to_dict("records") + else: raise ValueError("Other formats not yet supported") else: + logger.info("Getting the plan input from csv") plan_input = read_csv_from_s3( bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path ) + logger.info("Got the plan input from csv") # We then slide it on the indexes if they are provided if body.index_start is not None and body.index_end is not None: plan_input = plan_input[body.index_start:body.index_end] - # Check for duplicate UPRNS - input_uprns = [x.get("uprn") for x in plan_input if "uprn" in x and x.get("uprn")] - - if input_uprns: - # Check for dupes - if len(input_uprns) != len(set(input_uprns)): - raise ValueError("Duplicate UPRNs in the input data") + # Confirm no duplicate UPRNS + check_duplicate_uprns(plan_input) # If we have patches or overrides, we should read them in here patches, already_installed, non_invasive_recommendations, valuation_data = get_request_property_data(body) + if body.file_type == "xlsx" and body.file_format == "domna_asset_list": + # We check if we have valution data + if not valuation_data and body.valuation_file_path in [None, ""]: + # We check plan_input + if "domna_valuation" in plan_input[0]: + valuation_data = [{"uprn": x["uprn"], "valuation": x["domna_valuation"]} for x in plan_input] + + logger.info("Getting cleaning_data") cleaning_data = read_dataframe_from_s3_parquet( bucket_name=get_settings().DATA_BUCKET, file_key="sap_change_model/cleaning_dataset.parquet", ) - input_properties = [] - for config in tqdm(plan_input): + # Prepare input data + addresses = Addresses.from_plan_input(plan_input, body) + logger.info("Checking database for existing properties") - # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly - uprn = config.get("uprn", None) - if pd.isnull(uprn): - uprn = None - if uprn: - uprn = int(float(uprn)) + uprns = addresses.get_uprns() + landlord_ids = addresses.get_landlord_ids() + postcodes = addresses.get_postcodes_for_flats() + + # Check if we've seen these properties before + with db_read_session() as session: + existing_properties = db_funcs.property_functions.get_existing_properties( + session, body.portfolio_id, uprns, landlord_ids + ) + property_lookup = {} + for prop in existing_properties: + if prop.uprn: + property_lookup[("uprn", prop.uprn)] = prop.id + if prop.landlord_property_id: + property_lookup[("landlord_property_id", prop.landlord_property_id)] = prop.id + + # List of properties that need to be created in the db + to_create = [] + for addr in addresses: + key = ("uprn", addr.uprn) if addr.uprn else ("landlord_property_id", addr.landlord_property_id) + if key not in property_lookup: + to_create.append(addr) + + logger.info("Checking database for EPC cache") + # Pre-requests to the db + with db_read_session() as session: + epc_cache_by_uprn = db_funcs.epc_functions.EpcStoreService.get_epcs_for_uprns(session, uprns) + postcode_searches = db_funcs.address_functions.get_by_postcodes(session, list(postcodes)) + energy_assessments_by_uprn = db_funcs.energy_assessment_functions.get_latest_assessments_for_uprns( + session, uprns + ) + already_installed_by_uprn = db_funcs.already_installed_functions.get_installed_measure_types_by_uprns( + session, uprns + ) + + # If we have properties that need to be created, we cerate them in bulk + logger.info("Determine new properties to be created") + new_property_ids = set() + if to_create: + logger.info("Creating %d new properties", len(to_create)) + with db_session() as session: + inserted = db_funcs.property_functions.bulk_create_properties( + session, body, to_create, energy_assessments_by_uprn + ) + for prop_id, uprn, landlord_property_id in inserted: + new_property_ids.add(prop_id) + + # We append the newly created properties to property_lookup + for prop_id, uprn, landlord_property_id in inserted: + if uprn is not None: + property_lookup[("uprn", uprn)] = prop_id + if landlord_property_id: + property_lookup[("landlord_property_id", landlord_property_id)] = prop_id + + logger.info("Processing each property for model input preparation") + input_properties, inspections_map, eco_packages, epc_upserts = [], {}, {}, [] + for addr, config in tqdm( + zip(addresses, plan_input), + total=len(addresses), + desc="Processing properties", + ): + # ---------- 1) filter fetched data ---------- + epc_cache = epc_cache_by_uprn[addr.uprn] + epc_api_data, epc_page, rrn, = epc_cache["epc_api"], epc_cache["epc_page"], epc_cache["epc_page_rrn"] + # Extract from EPC cache + if epc_cache.get("status") == db_funcs.epc_functions.EpcStoreService.FRESH: + epc_api_data, epc_page, rrn = epc_cache["epc_api"], epc_cache["epc_page"], epc_cache["epc_page_rrn"] + + # Extract associated UPRNs from the database response + associated_uprns = db_funcs.address_functions.get_associated_uprns( + postcode_searches.get(addr.postcode.upper()), uprn=addr.uprn + ) + + energy_assessment = energy_assessments_by_uprn.get(addr.uprn) + + property_already_installed = list(already_installed_by_uprn[addr.uprn]) epc_searcher = SearchEpc( - address1=str(config["address"]), - postcode=config["postcode"], - uprn=uprn, + address1=addr.address1, + postcode=addr.postcode, + uprn=addr.uprn, auth_token=get_settings().EPC_AUTH_TOKEN, os_api_key="", + full_address=addr.full_address, + heating_system=addr.heating_system, + associated_uprns=associated_uprns ) - epc_searcher.ordnance_survey_client.built_form = config.get("built_form", None) - epc_searcher.ordnance_survey_client.property_type = config.get("property_type", None) + epc_searcher.ordnance_survey_client.built_form = addr.built_form + epc_searcher.ordnance_survey_client.property_type = addr.property_type # For the moment, our OS API access is unavailable, so we skip and interpolate - epc_searcher.find_property(skip_os=True) - if epc_searcher.newest_epc.get("estimated") and body.file_format == "domna_asset_list": - epc_searcher.newest_epc["uprn-source"] = epc_searcher.UPRN_SOURCE_SIMULATED - # We check for an energy assessment we have performed on this property: - energy_assessment = get_latest_assessment_by_uprn(session, uprn if uprn is not None else epc_searcher.uprn) + epc_searcher.find_property(skip_os=True, api_data=epc_api_data, overwrite_sap05=True) + epc_searcher.set_uprn_source(file_format=body.file_format) - # Create a record in db - property_id, is_new = create_property( - session=session, - portfolio_id=body.portfolio_id, - address=epc_searcher.address_clean, - postcode=epc_searcher.postcode_clean, - uprn=epc_searcher.uprn, - energy_assessment=energy_assessment + lookup_key = ( + ("uprn", addr.uprn) if addr.uprn is not None else ("landlord_property_id", addr.landlord_property_id) ) - if not is_new and not body.multi_plan: + property_id = property_lookup[lookup_key] + + if not property_id: + logger.error("Could not find property ID for address: %s", addr.request_data) + # Should not happen unless input data is inconsistent continue - if epc_searcher.newest_epc is None: - raise ValueError( - "No EPCs found for this property and did not estimate - likely need to provide a" - "property type and built form" - ) - - if is_new: - create_property_targets( - session, - property_id=property_id, - portfolio_id=body.portfolio_id, - epc_target=body.goal_value, - heat_demand_target=None - ) + is_new = property_id in new_property_ids + if not is_new and not body.multi_plan: + continue # If we have an energy assessment in place, that is newer than all of the previous EPCs, we use that. # Otherwise, we use the newest EPC @@ -605,60 +785,94 @@ async def model_engine(body: PlanTriggerRequest): epc_searcher, energy_assessment ) - patch, property_already_installed, property_non_invasive_recommendations, property_valuation = ( - extract_property_request_data( - config=config, - patches=patches, - already_installed=already_installed, - non_invasive_recommendations=non_invasive_recommendations, - valuation_data=valuation_data, - uprn=epc_searcher.uprn, - ) + req_data = extract_property_request_data( + address=addr, + patches=patches, + non_invasive_recommendations=non_invasive_recommendations, + valuation_data=valuation_data, + uprn=addr.uprn, ) + # Pull this out as it may get overwritten + property_non_invasive_recommendations, patch = req_data.non_invasive_recommendations, req_data.patch # if we have a remote assment data type, we pull the additional data and include it + epc_page_source = {} if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc.get("estimated")): - logger.info("Retrieving find my epc data") - try: - property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc( - epc_searcher.newest_epc + property_non_invasive_recommendations, patch, epc_page_source = ( + RetrieveFindMyEpc.get_from_epc_with_fallback( + epc=epc_searcher.newest_epc, + epc_page=epc_page, + rrn=rrn, + cleaned_address=epc_searcher.address_clean, + config_address=addr.address, + address_postal_town=epc_searcher.address_postal_town ) - except Exception as e: - logger.error(f"Failed to retrieve without cleaning address {e}") - for k in ["address", "address1"]: - epc_searcher.newest_epc[k] = epc_searcher.address_clean - property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc( - epc_searcher.newest_epc - ) - - # If we have a property type, this means when we pull the epc data, we might need to make a patch + ) epc_records = patch_epc(patch, epc_records) - prepared_epc = EPCRecord( - epc_records=epc_records, - run_mode="newdata", - cleaning_data=cleaning_data, - ) + prepared_epc = EPCRecord(epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data) + + # TODO: This is a temp function to handle a specific edge case with Peabody. We should + # factor this into EPCRecord as part of the cleaning however we need some more testing + prepared_epc = averages_cleaning(prepared_epc, cleaning_data) + + # If we have an ECO project, we parse the cavity/solar reasons + eco_packages[property_id] = parse_eco_packages(addr, prepared_epc) + + # Final step - extract inspections data, if we have it - we inject into property for usage + property_inspections = db_funcs.inspections_functions.extract_inspection_data(config) + if property_inspections: + inspections_map[property_id] = property_inspections input_properties.append( Property( id=property_id, + uprn=addr.uprn, is_new=is_new, address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, - already_installed=property_already_installed, - property_valuation=property_valuation, + already_installed=property_already_installed + eco_packages.get(property_id)[3], + property_valuation=req_data.valuation, non_invasive_recommendations=property_non_invasive_recommendations, energy_assessment=energy_assessment, + inspections=inspections_map.get(property_id), **Property.extract_kwargs(config), # TODO: Depraecate this ) ) + # If we have: + # 1) No EPC API data + # 2) A real EPC + # 3) A UPRN (meaning that a UPRN could be fetched against that property) + # We store this data + uprn_to_check_against = addr.uprn if addr.uprn is not None else epc_searcher.uprn # Until we enforce uprn + if db_funcs.epc_functions.EpcStoreService.check_insert_needed( + epc_cache, epc_searcher.newest_epc.get("estimated"), uprn_to_check_against, + ): + epc_upserts.append({ + "uprn": uprn_to_check_against, + "epc_api": epc_searcher.data, + "epc_page": epc_page_source.get("page_source"), + "epc_page_rrn": epc_page_source.get("rrn"), + }) + if not input_properties: return Response(status_code=204) + check_duplicate_property_ids(input_properties) + + logger.info("Inserting property data") + # We now bulk upload all of the EPC data + with db_session() as session: + db_funcs.epc_functions.EpcStoreService.bulk_upsert_epc_data(session, epc_upserts) + + # We check if we have inspections data and store it in the database if so. We'll update or create + # aginst each property if + with db_session() as session: + db_funcs.inspections_functions.bulk_upsert_inspections_pg(session, inspections_map) + # Set up model api and warm up the lambdas model_api = ModelApi( portfolio_id=body.portfolio_id, @@ -674,9 +888,10 @@ async def model_engine(body: PlanTriggerRequest): # consistent requests to the backend for # the same data logger.info("Reading in materials and cleaned datasets") - materials = get_materials(session) + with db_read_session() as session: + materials = db_funcs.materials_functions.get_materials(session) cleaned = get_cleaned() - project_scores_matrix, partial_project_scores_matrix, whlg_eligible_postcodes = get_funding_data() + # project_scores_matrix, partial_project_scores_matrix, whlg_eligible_postcodes = get_funding_data() kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True) @@ -695,11 +910,6 @@ async def model_engine(body: PlanTriggerRequest): input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET) [p.set_features(cleaned=cleaned, kwh_client=kwh_client, kwh_predictions=kwh_preds) for p in input_properties] - - # TODO: If a property is semi-detached, we might get roof surfaces for the main building + the neighbour - # TODO: If we can't get high image quality, should we use the solar API? Maybe just for semi-detached units with - # extensions, since it doesn't seem to do a great job - logger.info("Performing solar analysis") ofgem_consumption_averages = read_dataframe_from_s3_parquet( @@ -712,34 +922,45 @@ async def model_engine(body: PlanTriggerRequest): ofgem_consumption_averages=ofgem_consumption_averages, body=body ) + with db_session() as session: + input_properties = GoogleSolarApi.building_solar_analysis( + building_solar_config=building_solar_config, + input_properties=input_properties, + session=session, + google_solar_api_key=get_settings().GOOGLE_SOLAR_API_KEY, + solar_materials=[m for m in materials if m["type"] == "solar_pv"], + ) + with db_session() as session: + input_properties = GoogleSolarApi.unit_solar_analysis( + unit_solar_config=unit_solar_config, + input_properties=input_properties, + session=session, + body=body, + solar_materials=[m for m in materials if m["type"] == "solar_pv"], + google_solar_api_key=get_settings().GOOGLE_SOLAR_API_KEY, + inspections_map=inspections_map + ) - input_properties = GoogleSolarApi.building_solar_analysis( - building_solar_config=building_solar_config, - input_properties=input_properties, - session=session, - google_solar_api_key=get_settings().GOOGLE_SOLAR_API_KEY, - solar_materials=[m for m in materials if m["type"] == "solar_pv"] - ) - - input_properties = GoogleSolarApi.unit_solar_analysis( - unit_solar_config=unit_solar_config, - input_properties=input_properties, - session=session, - body=body, - solar_materials=[m for m in materials if m["type"] == "solar_pv"], - google_solar_api_key=get_settings().GOOGLE_SOLAR_API_KEY, - ) + # We also make a tweak - if the property has been flagged for solar but doesn't contain + # any panel performance, we ensure that we have a 3kWp and 4kWp option for the property logger.info("Identifying property recommendations") - recommendations = {} - recommendations_scoring_data = [] - representative_recommendations = {} + recommendations, recommendations_scoring_data, representative_recommendations = {}, [], {} for p in tqdm(input_properties): + # We set the ECO package data, if we have it + property_eco_package = eco_packages.get(p.id, (None, None, None)) + if property_eco_package[0] is not None: + inclusions = property_eco_package[0] + exclusions = [] + else: + inclusions = body.inclusions + exclusions = body.exclusions + recommender = Recommendations( property_instance=p, materials=materials, - exclusions=body.exclusions, - inclusions=body.inclusions, + exclusions=exclusions, + inclusions=inclusions, default_u_values=body.default_u_values ) property_recommendations, property_representative_recommendations = recommender.recommend() @@ -757,16 +978,15 @@ async def model_engine(body: PlanTriggerRequest): recommendations_scoring_data.extend(p.recommendations_scoring_data) - # TODO: Make sure that number_habitable_rooms has been dropped logger.info("Preparing data for scoring in sap change api") - recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) - - recommendations_scoring_data = recommendations_scoring_data.drop( + recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data).drop( columns=[ "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", "carbon_ending" ] ) + # Temp putting this here + recommendations_scoring_data["is_post_sap10_ending"] = True all_predictions = await model_api.async_paginated_predictions( data=recommendations_scoring_data, @@ -807,21 +1027,22 @@ async def model_engine(body: PlanTriggerRequest): # We now insert kwh estimates and costs into the recommendations logger.info("Calculating tenant savings - kwh and bills") - for property_id in tqdm([p.id for p in input_properties]): + for p in tqdm(input_properties): + property_id = p.id property_recommendations = recommendations.get(property_id, []) - property_instance = [p for p in input_properties if p.id == property_id][0] property_current_energy_bill = ( Recommendations.calculate_recommendation_tenant_savings( - property_instance=property_instance, + property_instance=p, kwh_simulation_predictions=kwh_simulation_predictions, property_recommendations=property_recommendations, ashp_cop=body.ashp_cop ) ) - property_instance.current_energy_bill = property_current_energy_bill + p.current_energy_bill = property_current_energy_bill # Insert the predictions into the recommendations and run the optimiser + logger.info("Optimising measures") for p in input_properties: if not recommendations.get(p.id): continue @@ -844,124 +1065,87 @@ async def model_engine(body: PlanTriggerRequest): ) continue + already_installed_measures = [] + for measures in measures_to_optimise: + for m in measures: + # A) We're going to make the already installed measures default + # B) We need to SAP points for all already installed measures to avoid double counting + if m["already_installed"]: + already_installed_measures.append( + { + "id": m["recommendation_id"], + "measure_type": m["measure_type"], + "sap_points": m["sap_points"], + } + ) + + # We get the ones with the highest SAP + default_already_installed = keep_max_sap_per_measure_type(already_installed_measures) + already_installed_sap = float(sum(d["sap_points"] for d in default_already_installed)) + fixed_gain = optimiser_functions.calculate_fixed_gain( property_required_measures, recommendations, p, needs_ventilation ) - gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain) - - funding = Funding( - tenure=body.housing_type, - project_scores_matrix=project_scores_matrix, - partial_project_scores_matrix=partial_project_scores_matrix, - whlg_eligible_postcodes=whlg_eligible_postcodes, - eco4_social_cavity_abs_rate=12.5, - eco4_social_solid_abs_rate=17, - eco4_private_cavity_abs_rate=12.5, - eco4_private_solid_abs_rate=17, - gbis_social_cavity_abs_rate=21, - gbis_social_solid_abs_rate=25, - gbis_private_cavity_abs_rate=21, - gbis_private_solid_abs_rate=28, + gain = optimiser_functions.calculate_gain( + body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages, + already_installed_gain=already_installed_sap ) - li_thickness = convert_thickness_to_numeric( - p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] - ) - current_wall_u_value = p.walls["thermal_transmittance"] - if current_wall_u_value is None: - current_wall_u_value = get_wall_u_value( - clean_description=p.walls["clean_description"], - age_band=p.age_band, - is_granite_or_whinstone=p.walls["is_granite_or_whinstone"], - is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"], - ) - # We insert the innovation uplift measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) - # TODO: Turn this into a function and store the innovaiton uplift for group in measures_to_optimise_with_uplift: for r in group: + (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + r["uplift_project_score"]) = (0, 0, 0, 0) - if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating", - "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]: - ( - r["partial_project_score"], - r["partial_project_funding"], - r["innovation_uplift"], - r["uplift_project_score"], - ) = ( - 0, 0, 0, 0 - ) - continue - - ( - r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], - r["uplift_project_score"] - ) = funding.get_innovation_uplift( - measure=r, - starting_sap=p.data["current-energy-efficiency"], - floor_area=p.floor_area, - is_cavity=p.walls["is_cavity_wall"], - current_wall_uvalue=current_wall_u_value, - is_partial="partial" in p.walls["clean_description"].lower(), - existing_li_thickness=li_thickness, - mainheating=p.main_heating, - main_fuel=p.main_fuel, - mainheat_energy_eff=p.data["mainheat-energy-eff"], - ) + # Remove them from the optimisation pool + finalised_measures_to_optimise = [] + for m in measures_to_optimise_with_uplift: + filtered = [x for x in m if not x["already_installed"]] + if filtered: + finalised_measures_to_optimise.append(filtered) input_measures = optimiser_functions.prepare_input_measures( - measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True + finalised_measures_to_optimise, body.goal, needs_ventilation, funding=True, + property_eco_packages=eco_packages.get(p.id) ) # When the goal is Increasing EPC, we can run the funding optimiser if body.goal == "Increasing EPC": - solutions = optimise_with_funding_paths( + solutions = optimise_with_scenarios( p=p, input_measures=input_measures, - housing_type=body.housing_type, budget=body.budget, target_gain=gain, - funding=funding + enforce_heat_pump_insulation=True, + enforce_fabric_first=body.enforce_fabric_first, + already_installed_sap=already_installed_sap, # To be passed to output ) - # Given the solutions we select the optimal one - solutions["cost_less_full_project_funding"] = np.where( - solutions["scheme"] == "eco4", - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], - solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] - ) - - solutions["cost_less_full_project_funding"] = ( - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"] - ) - solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) - - if solutions["meets_upgrade_target"].any(): - # If we have a solution that meets the upgrade target, we select that one - optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0] + # if handle the empty case + if solutions.empty: + solution, battery_sap_score = [], 0 else: - # Pick the cheapest - optimal_solution = solutions.iloc[0] - # This is the list of measures that we will recommend - scheme = optimal_solution["scheme"] - funded_measures = optimal_solution["items"] if scheme != "none" else [] - solution = optimal_solution["items"] + optimal_solution["unfunded_items"] - # This is the total amount of funding that the project will produce (including uplifts) (£) - project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ - optimal_solution["partial_project_funding"] - # This is the total amount of funding associated to the uplift (£) - total_uplift = optimal_solution["total_uplift"] - # This is the funding scheme selected - # This is the full project ABS - full_project_score = optimal_solution["project_score"] - # This is the partial project ABS - partial_project_score = optimal_solution["partial_project_score"] - # This is the uplift score ABS - uplift_project_score = optimal_solution["total_uplift_score"] + if solutions["meets_upgrade_target"].any(): + # If we have a solution that meets the upgrade target, we select that one + optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0] + else: + # We re-organise, taking the solution with the most gain and then the cheapest + solutions = solutions.sort_values( + by=["total_gain", "total_cost"], ascending=[False, True] + ) + optimal_solution = solutions.iloc[0] + + # We create this full list of selected measures, which is used in the next section for setting + # default measures + solution = deepcopy(optimal_solution["items"]) + pv_size = float(optimal_solution["array_size"]) + battery_sap_score = BatterySAPScorer.score( + starting_sap=optimal_solution["ending_sap_without_battery"], pv_size=pv_size + ) else: # We optimise and then we determine eligibility for funding, based on the measures selected optimiser = ( @@ -972,55 +1156,16 @@ async def model_engine(body: PlanTriggerRequest): optimiser.setup() optimiser.solve() solution = optimiser.solution + gain = optimiser.solution_gain + post_sap = int(p.data["current-energy-efficiency"]) + gain - recommendation_types = [] - for measures in input_measures: - for measure in measures: - recommendation_types.append(measure["type"]) - recommendation_types = set(recommendation_types) - - has_wall_insulation_recommendation = any( - (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in - WALL_INSULATION_MEASURES - ) - has_roof_insulation_recommendation = any( - (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in - ROOF_INSULATION_MEASURES + pv_size = next( + (m["array_size"] for m in solution if m["type"] == "solar_pv"), 0 ) + battery_sap_score = BatterySAPScorer.score(starting_sap=post_sap, pv_size=pv_size) - funding.check_funding( - measures=solution, - starting_sap=p.data["current-energy-efficiency"], - ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]), - floor_area=p.floor_area, - mainheat_description=p.main_heating["clean_description"], - heating_control_description=p.main_heating_controls["clean_description"], - is_cavity=p.walls["is_cavity_wall"], - current_wall_uvalue=current_wall_u_value, - is_partial="partial" in p.walls["clean_description"].lower(), - existing_li_thickness=li_thickness, - mainheating=p.main_heating, - main_fuel=p.main_fuel, - mainheat_energy_eff=p.data["mainheat-energy-eff"], - has_wall_insulation_recommendation=has_wall_insulation_recommendation, - has_roof_insulation_recommendation=has_roof_insulation_recommendation, - ) - - # Determine the scheme - scheme = "none" - if funding.eco4_eligible: - scheme = "eco4" - if scheme == "none" and funding.gbis_eligible: - scheme = "gbis" - - funded_measures = solution if scheme in ["gbis", "eco4"] else [] - project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs - total_uplift = funding.eco4_uplift - full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs - partial_project_score = funding.partial_project_abs - uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift - - selected = {r["id"] for r in solution} + # We add the defauly already installed measures to the solution + selected = {r["id"] for r in solution + default_already_installed} if property_required_measures: solution = optimiser_functions.add_required_measures( @@ -1030,29 +1175,16 @@ async def model_engine(body: PlanTriggerRequest): # Add best practice measures (ventilation/trickle vents) selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) - # Final flattening + # Final flattening - we pass what the battery SAP score would be, regardless if the battery was selected recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( - p.id, recommendations, selected - ) - - # TODO: functionise - for measure in funded_measures: - if "+mechanical_ventilation" in measure["type"]: - measure["type"] = measure["type"].split("+mechanical_ventilation")[0] - - p.insert_funding( - scheme=scheme, - funded_measures=funded_measures, - project_funding=project_funding, - total_uplift=total_uplift, - full_project_score=full_project_score, - partial_project_score=partial_project_score, - uplift_project_score=uplift_project_score + p.id, recommendations, selected, battery_sap_score ) # when we have buildings, we tweak our solar PV recommendations as if one unit needs it, we apply it to all # of them # TODO: We can probably do better and optimise at the building level - this is temp + # Idea: - optimise all measures except solar at the unit level. Then, test with and without solar for + # all units at the same time logger.info("Adjusting solar PV recommendations for buildings") building_ids = set([p.building_id for p in input_properties if p.building_id is not None]) @@ -1082,156 +1214,167 @@ async def model_engine(body: PlanTriggerRequest): # We don't need to create a new scenario, we just use the existing one scenario_id = body.scenario_id else: - engine_scenario = create_scenario( - session=session, - scenario={ - "name": body.scenario_name, - "created_at": created_at, - "budget": body.budget, - "portfolio_id": body.portfolio_id, - "housing_type": body.housing_type, - "goal": body.goal, - "goal_value": body.goal_value, - "trigger_file_path": body.trigger_file_path, - "already_installed_file_path": body.already_installed_file_path, - "patches_file_path": body.patches_file_path, - "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path, - "exclusions": body.exclusions, - "multi_plan": body.multi_plan - } - ) - scenario_id = engine_scenario.id - - property_valuation_increases = [] - session.commit() - new_epc_bands = {} - property_value_increase_ranges = {} - for i in range(0, len(input_properties), BATCH_SIZE): - try: - # Take a slice of the input_properties list to make a batch - batch_properties = input_properties[i:i + BATCH_SIZE] - - for p in batch_properties: - recommendations_to_upload = recommendations.get(p.id, []) - default_recommendations = [r for r in recommendations_to_upload if r["default"]] - total_sap_points = sum([r["sap_points"] for r in default_recommendations]) - new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points - new_epc = sap_to_epc(new_sap_points) - new_epc_bands[p.id] = new_epc - - total_cost = sum([r["total"] for r in default_recommendations]) - - valuations = PropertyValuation.estimate( - property_instance=p, target_epc=new_epc, total_cost=total_cost - ) - property_value_increase_ranges[p.id] = valuations - - if p.is_new: - property_details_epc = p.get_property_details_epc( - portfolio_id=body.portfolio_id, rating_lookup=rating_lookup, - ) - create_property_details_epc(session, property_details_epc) - - update_or_create_property_spatial_details(session, p.uprn, p.spatial) - - property_data = p.get_full_property_data(current_valuation=valuations["current_value"]) - - update_property_data( - session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data - ) - - if not recommendations_to_upload: - continue - - new_plan_id = create_plan(session, { - "portfolio_id": body.portfolio_id, - "property_id": p.id, - "scenario_id": scenario_id, - "is_default": True if p.is_new else False, + with db_session() as session: + scenario_id = db_funcs.recommendations_functions.create_scenario( + session=session, + scenario={ "name": body.scenario_name, - "valuation_increase_lower_bound": ( - valuations["lower_bound_increased_value"] - valuations["current_value"] - ), - "valuation_increase_upper_bound": ( - valuations["upper_bound_increased_value"] - valuations["current_value"] - ), - "valuation_increase_average": ( - valuations["average_increased_value"] - valuations["current_value"] - ), - }) + "created_at": created_at, + "budget": body.budget, + "portfolio_id": body.portfolio_id, + "housing_type": body.housing_type, + "goal": body.goal, + "goal_value": body.goal_value, + "trigger_file_path": body.trigger_file_path, + "already_installed_file_path": body.already_installed_file_path, + "patches_file_path": body.patches_file_path, + "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path, + "exclusions": body.exclusions, + "multi_plan": body.multi_plan + } + ) - upload_recommendations( - session, recommendations_to_upload, p.id, new_plan_id - ) + property_updates, property_epc_details, property_spatial_updates = [], [], [] + plans_to_create, recommendations_to_create = [], [] + # Prepare the data that will need to be uploaded in bulk + for p in input_properties: + recommendations_for_property = recommendations.get(p.id, []) + default_recommendations = [r for r in recommendations_for_property if r["default"]] - upload_funding(session, p, new_plan_id, recommendations_to_upload) + # We need to: + # Get already installed measures + already_installed_default = [r for r in default_recommendations if r["already_installed"]] + # Property should be have increased SAP + needs_rebaselining = bool(len(already_installed_default)) + rebaselining_sap = float(sum([r["sap_points"] for r in already_installed_default])) + rebaselining_carbon = float(sum([r["co2_equivalent_savings"] for r in already_installed_default])) + rebaselining_heat_demand = float(sum([r["heat_demand"] for r in already_installed_default])) + rebaselining_kwh = float(sum([r["kwh_savings"] for r in already_installed_default])) + rebaselining_bills = float(sum([r["energy_cost_savings"] for r in already_installed_default])) - property_valuation_increases.append( - valuations["average_increased_value"] - valuations["current_value"] - ) + # This will include everything, including already installed + total_sap_points = sum([r["sap_points"] for r in default_recommendations]) + new_sap_points = float(p.data["current-energy-efficiency"]) + total_sap_points + new_epc = sap_to_epc(new_sap_points) + # Already installed measures do not have a cost but we remove anyway + total_cost = sum([r["total"] for r in default_recommendations if not r["already_installed"]]) + valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc, total_cost=total_cost) - # Commit the session after each batch - session.commit() + # --- property-level updates (always) --- + property_updates.append({ + "property_id": p.id, + "portfolio_id": body.portfolio_id, + "data": p.get_full_property_data( + current_valuation=valuations["current_value"], + needs_rebaselining=needs_rebaselining, + rebaselining_sap=rebaselining_sap, + ) + }) - except Exception as e: - # Rollback the session if an error occurs - session.rollback() - print("Failed i = %s" % str(i)) - logger.error(f"An error occurred during batch starting at index {i}: {e}") - logger.error(f"property is uprn {p.uprn} id {p.id} address {p.address}") + property_epc_details.append( + p.get_property_details_epc( + portfolio_id=body.portfolio_id, + needs_rebaselining=needs_rebaselining, + rebaselining_carbon=rebaselining_carbon, + rebaselining_heat_demand=rebaselining_heat_demand, + rebaselining_kwh=rebaselining_kwh, + rebaselining_bills=rebaselining_bills, + ) + ) - logger.info("Creating portfolio aggregations") - # We implement this in the simplest way possible which will be just to query the database for all - # recommendations associated to the portfolio and then aggregate them. This is not the most efficient - # way to do this, but it's the simplest and will be a process that we can re-use since when we change a - # recommendation from being default to not default, we'll need to re-run this process to re-calculate the - # the portfolion level impact + property_spatial_updates.append({"uprn": p.uprn, "data": p.spatial}) - total_valuation_increase = sum(property_valuation_increases) - labour_days = round(max( - [sum(r["labour_days"] for r in rec_group if r["default"]) for p_id, rec_group in recommendations.items()] - )) + # --- skip plan creation if no recommendations --- + if not recommendations_for_property: + continue - # TODO - This code only pulls in the properties that have been updated in this run, but we need to - # aggregate all properties in the portfolio. We likely need to trigger a re-aggregation - aggregated_data = extract_portfolio_aggregation_data( - input_properties=input_properties, - total_valuation_increase=total_valuation_increase, - recommendations=recommendations, - new_epc_bands=new_epc_bands, - property_value_increase_ranges=property_value_increase_ranges - ) + plan_data = db_funcs.recommendations_functions.prepare_plan_data( + p=p, + body=body, + scenario_id=scenario_id, + eco_packages=eco_packages, + valuations=valuations, + new_sap_points=new_sap_points, + new_epc=new_epc, + default_recommendations=default_recommendations, + rebaselining_carbon=rebaselining_carbon, + rebaselining_heat_demand=rebaselining_heat_demand, + rebaselining_kwh=rebaselining_kwh, + rebaselining_bills=rebaselining_bills, + ) + plans_to_create.append({"property_id": p.id, "plan_data": plan_data}) - aggregate_portfolio_recommendations( - session, - portfolio_id=body.portfolio_id, - scenario_id=scenario_id, - total_valuation_increase=total_valuation_increase, - labour_days=labour_days, - aggregated_data=aggregated_data - ) + # store recommendations keyed by property + for r in recommendations_for_property: + recommendations_to_create.append({ + "property_id": p.id, + # ---- Recommendation core ---- + "type": r["type"], + "measure_type": r["measure_type"], + "description": r["description"], + "estimated_cost": float(r["total"]), + "default": r["default"], + "starting_u_value": float(r["starting_u_value"]) if r.get("starting_u_value") else None, + "new_u_value": float(r["new_u_value"]) if r.get("new_u_value") else None, + "sap_points": float(r["sap_points"]), + "energy_savings": float(r["heat_demand"]), + "kwh_savings": float(r["kwh_savings"]), + "co2_equivalent_savings": float(r["co2_equivalent_savings"]), + "total_work_hours": float(r["labour_hours"]), + "energy_cost_savings": float(r["energy_cost_savings"]), + "labour_days": float(r["labour_days"]), + "already_installed": r["already_installed"], + "heat_demand": float(r["heat_demand"]), - # Commit final changes - session.commit() + # ---- parts ---- + "parts": [ + { + "material_id": part["id"], + "depth": int(part["depth"]) if part.get("depth") else None, + "quantity": float(part["quantity"]) if part.get("quantity") else None, + "quantity_unit": part.get("quantity_unit"), + "estimated_cost": float(part.get("total", part.get("total_cost"))), + } + for part in r.get("parts", []) + ], + }) - except IntegrityError: - logger.error("Database integrity error occurred", exc_info=True) - session.rollback() - return Response(status_code=500, content="Database integrity error.") - except OperationalError: - logger.error("Database operational error occurred", exc_info=True) - session.rollback() - return Response(status_code=500, content="Database operational error.") - except ValueError: - logger.error("Value error - possibly due to malformed data", exc_info=True) - session.rollback() - return Response(status_code=400, content="Bad request: malformed data.") + # Bulk upload property data + logger.info("Uploading property data in bulk") + with db_session() as session: + db_funcs.property_functions.bulk_update_properties(session, property_updates) + db_funcs.property_functions.bulk_upsert_property_details_epc(session, property_epc_details) + db_funcs.property_functions.bulk_upsert_property_spatial(session, property_spatial_updates) + # # Bulk create plans + plan_id_by_property = db_funcs.recommendations_functions.bulk_create_plans(session, plans_to_create) + recommendation_payload = [ + { + "plan_id": plan_id_by_property[r["property_id"]], + **{k: v for k, v in r.items() if k not in ["parts"]}, + "parts": r["parts"], + } for r in recommendations_to_create if r["property_id"] in plan_id_by_property + ] + + db_funcs.recommendations_functions.bulk_upload_recommendations_and_materials( + session, recommendation_payload + ) + + logger.info("Work completed, updating log status") + + except IntegrityError as e: + return handle_error("Database integrity error.", e, body.subtask_id, 500, start_ms) + except OperationalError as e: + return handle_error("Database operational error.", e, body.subtask_id, 500, start_ms) + except ValueError as e: + return handle_error("Bad request: malformed data.", e, body.subtask_id, 400, start_ms) except Exception as e: # General exception handling - logger.error(f"An error occurred: {e}") - session.rollback() - return Response(status_code=500, content="An unexpected error occurred.") - finally: - session.close() + return handle_error("An unexpected error occurred.", e, body.subtask_id, 500, start_ms) + + cloud_logs_url = build_cloudwatch_log_url(start_ms) + # Mark the subtask as successful + SubTaskInterface().update_subtask_status( + subtask_id=UUID(body.subtask_id), status="complete", cloud_logs_url=cloud_logs_url + ) logger.info("Model Engine completed successfully") diff --git a/backend/ml_models/AnnualBillSavings.py b/backend/ml_models/AnnualBillSavings.py index 4291b1d1..f04ee2f1 100644 --- a/backend/ml_models/AnnualBillSavings.py +++ b/backend/ml_models/AnnualBillSavings.py @@ -26,21 +26,21 @@ class AnnualBillSavings: AVERAGE_ELECTRICITY_CONSUMPTION = 2700 AVERAGE_GAS_CONSUMPTION = 11500 - # Latest price cap figures from Ofgem are for April 2024 + # Latest price cap figures from Ofgem are for Jan 2026 to March 2026 # https://www.ofgem.gov.uk/energy-price-cap - ELECTRICITY_PRICE_CAP = 0.2573 - GAS_PRICE_CAP = 0.0633 - # This is the most recent export payment figure, at 9.28p/kWh + ELECTRICITY_PRICE_CAP = 0.2769 + GAS_PRICE_CAP = 0.0593 + # This is the most recent export payment figure, at 13p/kWh - Updated Nov 2025 # Smart export guarantee rates can be found here: # https://www.sunsave.energy/solar-panels-advice/exporting-to-the-grid/best-seg-rates - ELECTRICITY_EXPORT_PAYMENT = 0.0928 + ELECTRICITY_EXPORT_PAYMENT = 0.13 # This is a weighted mean of the price caps, using the consumption figures above as weights PRICE_FACTOR = 0.09549999999999999 # Daily standard charge, based on average across England, Scotland and Wales, and includes VAT - DAILY_STANDARD_CHARGE_GAS = 0.2982 - DAILY_STANDARD_CHARGE_ELECTRICITY = 0.5137 + DAILY_STANDARD_CHARGE_GAS = 0.3509 + DAILY_STANDARD_CHARGE_ELECTRICITY = 0.5475 # Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison # For July 2024. These quotes are based on the east midlands region, so we @@ -263,7 +263,8 @@ class AnnualBillSavings: if fuel == "Electricity": return (kwh / cop) * cls.ELECTRICITY_PRICE_CAP - if fuel in ["Natural Gas", "Natural Gas (Community Scheme)"]: + # We handle "Unmapped" in a similar fashion to gas + if fuel in ["Natural Gas", "Natural Gas (Community Scheme)", "Unmapped"]: return (kwh / cop) * cls.GAS_PRICE_CAP if fuel == "LPG": @@ -285,10 +286,18 @@ class AnnualBillSavings: # The solar thermal covers a % of the heating kwh, so we need to adjust the cost return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.GAS_PRICE_CAP - if fuel == "Electricity + Solar Thermal": + if fuel in ["Electricity + Solar Thermal", 'Unmapped + Solar Thermal']: # The solar thermal covers a % of the heating kwh, so we need to adjust the cost return (kwh / cop) * assumptions.SOLAR_CONSUMPTION_PROPORTION * cls.ELECTRICITY_PRICE_CAP + if fuel in ['Oil + Solar Thermal']: + # The solar thermal covers a % of the heating kwh, so we need to adjust the cost + price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "Kerosene"].squeeze() + cost_per_kwh = cls.cost_per_kwh( + price_data["Price (p)"], price_data["Energy Content, Net Calorific value (kWh/unit)"] + ) + return (kwh / cop) * cost_per_kwh * assumptions.SOLAR_CONSUMPTION_PROPORTION + if fuel == "LPG + Solar Thermal": # The solar thermal covers a % of the heating kwh, so we need to adjust the cost price_data = cls.FUEL_DATA[cls.FUEL_DATA["Fuel"] == "LPG"].squeeze() diff --git a/backend/ml_models/Valuation.py b/backend/ml_models/Valuation.py index 8c57900f..17db0dae 100644 --- a/backend/ml_models/Valuation.py +++ b/backend/ml_models/Valuation.py @@ -219,12 +219,19 @@ class PropertyValuation: current_epc = property_instance.data["current-energy-rating"] if not current_value: + # In this case, we return a % improvement rather than an absolute + relative_improvement = cls.estimate_valuation_improvement( + current_value=1, + current_epc=current_epc, + target_epc=target_epc, + total_cost=1 + ) return { "current_value": 0, - "lower_bound_increased_value": 0, - "upper_bound_increased_value": 0, - "average_increased_value": 0, - "average_increase": 0 + "lower_bound_increased_value": relative_improvement["lower_bound_increased_value"] - 1, + "upper_bound_increased_value": relative_improvement["upper_bound_increased_value"] - 1, + "average_increased_value": relative_improvement["average_increased_value"] - 1, + "average_increase": relative_improvement["average_increase"] } return cls.estimate_valuation_improvement(current_value, current_epc, target_epc, total_cost) diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py index 8d1dbeea..daf4b715 100644 --- a/backend/ml_models/api.py +++ b/backend/ml_models/api.py @@ -44,7 +44,7 @@ class ModelApi: self.timestamp = timestamp self.prediction_buckets = prediction_buckets self.max_retries = max_retries - self.semaphore = asyncio.Semaphore(2) + self.semaphore = asyncio.Semaphore(3) @staticmethod def get_aiohttp_session(): @@ -117,7 +117,7 @@ class ModelApi: } async with self.semaphore: - await asyncio.sleep(random.uniform(0.3, 1.2)) + # await asyncio.sleep(random.uniform(0.3, 1.2)) try: async with session.post(url, json=payload, headers=headers, timeout=120) as response: if response.status != 200: @@ -142,7 +142,8 @@ class ModelApi: @staticmethod def extract_phase(recommendation_id): if 'phase=' in recommendation_id: - return int(recommendation_id.split('phase=')[1][0]) + extracted = recommendation_id.split('phase=')[1] + return int(extracted.strip()) else: return None @@ -211,13 +212,14 @@ class ModelApi: response = await self.predict_async(f"s3://{bucket}/" + file_location, model_prefix, session=session) return model_prefix, response - results = [] - for coro in asyncio.as_completed([run_model(mp) for mp in model_prefixes]): - result = await coro - results.append(result) + # Run all model calls concurrently + results = await asyncio.gather( + *(run_model(mp) for mp in model_prefixes), + return_exceptions=True + ) for model_prefix, response in results: - if response: + if response and not isinstance(response, Exception): predictions_bucket = self.prediction_buckets[model_prefix] predictions_df = pd.DataFrame( read_dataframe_from_s3_parquet( @@ -257,8 +259,10 @@ class ModelApi: model_prefixes = self.MODEL_PREFIXES if model_prefies is None else model_prefies session = self.get_aiohttp_session() tasks = [ - self._send_warm_up_request(session, f"{self.base_url}/{self.MODEL_URLS[model_prefix]}/predict", - model_prefix) + self._send_warm_up_request( + session, f"{self.base_url}/{self.MODEL_URLS[model_prefix]}/predict", + model_prefix + ) for model_prefix in model_prefixes ] await asyncio.gather(*tasks, return_exceptions=True) @@ -271,7 +275,10 @@ class ModelApi: "file_location": "s3://warm-up-placeholder", "portfolio_id": 0, "property_id": "", - "created_at": "2020-01-01T00:00:00" + "created_at": "2020-01-01T00:00:00", + "warm": True + # The presence of this key will send the api down a specific warm up route, to call + # prediction and load the font manager, because that is a key bottleneck for cold starts } async with session.post(url, json=json_payload, timeout=10) as response: text = await response.text() diff --git a/backend/onboarders/mappings/age_band.py b/backend/onboarders/mappings/age_band.py new file mode 100644 index 00000000..2487c921 --- /dev/null +++ b/backend/onboarders/mappings/age_band.py @@ -0,0 +1,14 @@ +party_map = { + "Before 1900": 'England and Wales: before 1900', + "1900-1929": 'England and Wales: 1900-1929', + "1930-1949": 'England and Wales: 1930-1949', + "1950-1966": 'England and Wales: 1950-1966', + "1967-1975": 'England and Wales: 1967-1975', + "1976-1982": 'England and Wales: 1976-1982', + "1983-1990": 'England and Wales: 1983-1990', + "1991-1995": 'England and Wales: 1991-1995', + "1996-2002": 'England and Wales: 1996-2002', + "2003-2006": 'England and Wales: 2003-2006', + "2007-2011": 'England and Wales: 2007-2011', + "2012 onwards": 'England and Wales: 2012-2021', +} diff --git a/backend/onboarders/mappings/built_form.py b/backend/onboarders/mappings/built_form.py new file mode 100644 index 00000000..23901fc6 --- /dev/null +++ b/backend/onboarders/mappings/built_form.py @@ -0,0 +1,15 @@ +parity_map = { + "MidTerrace": "Mid-Terrace", + "EndTerrace": "End-Terrace", + "Detached": "Detached", + "SemiDetached": "Semi-Detached", + "EnclosedMidTerrace": "Enclosed Mid-Terrace", + "EnclosedEndTerrace": "Enclosed End-Terrace", +} + +# MidTerrace 41462 +# EndTerrace 20910 +# Detached 16875 +# SemiDetached 14725 +# EnclosedMidTerrace 3176 +# EnclosedEndTerrace 2393 diff --git a/backend/onboarders/mappings/property_type.py b/backend/onboarders/mappings/property_type.py new file mode 100644 index 00000000..75deef04 --- /dev/null +++ b/backend/onboarders/mappings/property_type.py @@ -0,0 +1,6 @@ +parity_map = { + "Flat": "Flat", + "Maisonette": "Maisonette", + "Bungalow": "Bungalow", + "House": "House", +} diff --git a/backend/onboarders/mappings/walls.py b/backend/onboarders/mappings/walls.py new file mode 100644 index 00000000..9b70b49c --- /dev/null +++ b/backend/onboarders/mappings/walls.py @@ -0,0 +1,3 @@ +parity_map = { + +} diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py new file mode 100644 index 00000000..f41ebeaf --- /dev/null +++ b/backend/onboarders/parity.py @@ -0,0 +1,95 @@ +import pandas as pd +from etl.epc.DataProcessor import construction_age_bounds_map +from backend.onboarders.mappings.property_type import parity_map as property_map +from backend.onboarders.mappings.age_band import party_map as age_band_map +from backend.onboarders.mappings.built_form import parity_map as built_form_map + + +def check_nulls(data, original_column, mapped_column): + # We only allow nulls if the oroginal value was null + null_vals = data[pd.isnull(data[mapped_column])] + if null_vals.empty: + return True + # We make sure all original values were null + assert pd.isnull(null_vals[original_column]).all(), ( + f"Some values in {mapped_column} were not mapped, but original values were not null" + ) + + +# Sample input data + +data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +# We want to map the parity fields to standard EPC references. This will allow us to +# 1) Estimate EPCs, more accurately +# 2) Patch incorrect EPCs with ease +# 3) Indicate already installed measures + +# ------------ construction_age_band ------------ +# Map to EPC age bands +# def construction_date_to_band(year): +# if pd.isnull(year): +# return None +# # Get the year from the date which is numpy datetime format +# for label, ranges in construction_age_bounds_map.items(): +# if ranges["l"] <= year <= ranges["u"]: +# return label +# raise NotImplementedError("year out of bounds") +# +# +# data["construction_age_band"] = pd.to_datetime(data["Construction Date"]).dt.year.apply(construction_date_to_band) + +data["construction_age_band"] = data["Construction Years"].map(age_band_map) + +check_nulls(data, "Construction Years", "construction_age_band") + +# ------------ property_type ------------ +data["property_type"] = data["Type"].map(property_map) + +assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped" + +# ------------ built_form ------------ +data["built_form"] = data["Attachment"].map(built_form_map) + +assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped" + +# ------------ Wall Construction ------------ + +data["walls_combined"] = data["Wall Construction"] + "+" + data["Wall Insulation"].fillna("Unknown Insulation") + +data["Wall Insulation"].value_counts() +data["Wall Construction"].value_counts() + +as_built_map = { + "Cavity": {"insulated_age_bands":[], "partial_insulated_age_bands": []}, + "Solid Brick": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "System": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "Timber Frame": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "Sandstone": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "Granite": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, + "Cob": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, +} + +def map_wall_construction(wall_constuction, wall_insulation, construction_age_band): + if wall_insulation == "AsBuilt": + # Deduce based on wall construction and age band + bands = as_built_map.get(wall_constuction, None) + if bands is None: + raise NotImplementedError(f"Wall construction {wall_constuction} not in as built map") + + # We check if the age band is in insulated or partial insulated, and if neither, we assume uninsulated + + + + +# Variables we want to map +'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type', + 'Attachment', 'Construction Years', 'Wall Construction', + 'Wall Insulation', 'Roof Construction', 'Roof Insulation', + 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', + 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN', + 'Total Floor Area (m2)' \ No newline at end of file diff --git a/backend/run_curl.sh b/backend/run_curl.sh new file mode 100644 index 00000000..22433e39 --- /dev/null +++ b/backend/run_curl.sh @@ -0,0 +1,11 @@ +curl -X POST "http://localhost:8000/v1/whlg/eligible" \ + -H "Content-Type: application/json" \ + -d '{"postcode": "B93 8SY"}' + +curl -X POST "http://localhost:8000/v1/whlg/eligible" \ + -H "Content-Type: application/json" \ + -d '{"postcode": "BN15 0FD"}' + +curl -X POST "http://localhost:8000/v1/whlg/eligible" \ + -H "Content-Type: application/json" \ + -d '{"postcode": "DY6 0LB"}' diff --git a/backend/run_local.sh b/backend/run_local.sh new file mode 100644 index 00000000..be45a54a --- /dev/null +++ b/backend/run_local.sh @@ -0,0 +1,6 @@ +set -a +source ./.env +set +a + +uvicorn app.main:app --reload + diff --git a/backend/tests/test_data/innovation_measure_fixtures.py b/backend/tests/test_data/innovation_measure_fixtures.py index 886421c4..51f8e3ee 100644 --- a/backend/tests/test_data/innovation_measure_fixtures.py +++ b/backend/tests/test_data/innovation_measure_fixtures.py @@ -4,7 +4,7 @@ innovation_scenarios = [ # 1) Innovation PV, non-eligible heating system in place, EPC D - not eligible { "description": "Innovation PV, non-eligible heating system in place, EPC D", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Electric storage heaters", "heating_control_description": "Manual charge control", @@ -16,7 +16,7 @@ innovation_scenarios = [ # 2) Innovation PV, eligible heating system in place, EPC D - eligible { "description": "Innovation PV, eligible heating system in place, EPC D", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", "heating_control_description": "Programmer, room thermostat and TRVs", @@ -29,8 +29,8 @@ innovation_scenarios = [ { "description": "Innovation PV + HHRSH upgrade, EPC E", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "high_heat_retention_storage_heater", "is_innovation": True, "uplift": 0.1} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "high_heat_retention_storage_heaters", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 50, "mainheat_description": "Electric storage heaters", @@ -44,8 +44,8 @@ innovation_scenarios = [ { "description": "Innovation PV + HHRSH upgrade, EPC E", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "high_heat_retention_storage_heater", "is_innovation": True, "uplift": 0.1} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "high_heat_retention_storage_heaters", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 50, "mainheat_description": "Electric storage heaters", @@ -58,7 +58,7 @@ innovation_scenarios = [ # 5) Innovation PV, needs wall insulation, no wall insulation measure - not eligible { "description": "Innovation PV, wall insulation recommended, but not installed", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", "heating_control_description": "Programmer, room thermostat and TRVs", @@ -71,8 +71,8 @@ innovation_scenarios = [ { "description": "Innovation PV, wall insulation recommended and installed", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0.25} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", @@ -85,7 +85,7 @@ innovation_scenarios = [ # 7) Innovation PV, needs roof insulation, no roof insulation measure - not eligible { "description": "Innovation PV, roof insulation recommended, not installed", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", "heating_control_description": "Programmer, room thermostat and TRVs", @@ -98,8 +98,8 @@ innovation_scenarios = [ { "description": "Innovation PV, roof insulation recommended and installed", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", @@ -112,7 +112,7 @@ innovation_scenarios = [ # 9) Innovation PV, needs both roof + wall insulation, no insulation - not eligible { "description": "Innovation PV, both insulations recommended, none installed", - "measures": [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}], + "measures": [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", "heating_control_description": "Programmer, room thermostat and TRVs", @@ -125,8 +125,8 @@ innovation_scenarios = [ { "description": "Innovation PV, both insulations recommended, only wall done", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0.25} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", @@ -140,8 +140,8 @@ innovation_scenarios = [ { "description": "Innovation PV, both insulations recommended, only roof done", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", @@ -155,9 +155,9 @@ innovation_scenarios = [ { "description": "Innovation PV, both insulations recommended and installed", "measures": [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0.25}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ], "starting_sap": 60, "mainheat_description": "Air source heat pump, radiators", diff --git a/backend/tests/test_funding.py b/backend/tests/test_funding.py index 59d65a28..8646ab27 100644 --- a/backend/tests/test_funding.py +++ b/backend/tests/test_funding.py @@ -120,7 +120,7 @@ def test_eco4_prs_eligible_with_swi( # 3) is getting a solid was measure # so it's eligible for ECO4 - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=50, # EPC E @@ -162,7 +162,7 @@ def test_eco4_prs_not_eligible_high_epc( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=72, # EPC C (too high) @@ -203,7 +203,7 @@ def test_gbis_prs_general_eligibility( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=65, # EPC D @@ -244,7 +244,7 @@ def test_gbis_prs_low_income_caveat( tenure="Private", ) - measures = [{"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=60, # EPC D @@ -290,7 +290,7 @@ def test_eco4_sh_epc_e_eligible( tenure="Social", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=50, # EPC E @@ -330,7 +330,7 @@ def test_eco4_sh_epc_d_requires_innovation( tenure="Social", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] funding.check_funding( measures=measures, starting_sap=60, # EPC D @@ -365,7 +365,7 @@ def test_eco4_sh_epc_d_requires_innovation( gbis_private_solid_abs_rate=28, tenure="Social", ) - measures2 = [{"type": "internal_wall_insulation", "is_innovation": True, "uplift": 0.25}] + measures2 = [{"type": "internal_wall_insulation", "is_innovation": True, "innovation_uplift": 0.25}] funding2.check_funding( measures=measures2, starting_sap=60, # EPC D @@ -403,7 +403,7 @@ def test_eco4_sh_epc_d_requires_innovation( gbis_private_solid_abs_rate=28, tenure="Social", ) - measures3 = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}] + measures3 = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}] funding3.check_funding( measures=measures3, starting_sap=60, # EPC D @@ -439,7 +439,7 @@ def test_eco4_sh_epc_d_requires_innovation( tenure="Social", ) - measures4 = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, ] + measures4 = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, ] funding4.check_funding( measures=measures4, starting_sap=60, # EPC D @@ -476,8 +476,8 @@ def test_eco4_sh_epc_d_requires_innovation( ) measures5 = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "high_heat_retention_storage_heater", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "high_heat_retention_storage_heaters", "is_innovation": False, "innovation_uplift": 0} ] funding5.check_funding( measures=measures5, @@ -516,7 +516,7 @@ def test_eco4_sh_epc_d_requires_innovation( ) measures6 = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, ] funding6.check_funding( measures=measures6, @@ -556,9 +556,9 @@ def test_eco4_sh_epc_d_requires_innovation( tenure="Social", ) measures7 = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0.25}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ] funding7.check_funding( measures=measures7, @@ -599,7 +599,7 @@ def test_eco4_sh_solar_pv_requires_heating( tenure="Social", ) - measures = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}] + measures = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}] funding.check_funding( measures=measures, starting_sap=60, # EPC D @@ -641,8 +641,8 @@ def test_eco4_sh_solar_pv_with_heating_is_ok( ) measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( measures=measures, @@ -684,7 +684,7 @@ def test_eco4_upgrade_requirement_e_to_c_pass( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] # E (SAP 50) → C (SAP 70) meets upgrade rule funding.check_funding( @@ -727,7 +727,7 @@ def test_eco4_upgrade_requirement_e_to_d_fail( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] # E (SAP 50) → D (SAP 65) does NOT meet ECO4 upgrade rule funding.check_funding( @@ -770,7 +770,7 @@ def test_eco4_upgrade_requirement_f_to_d_pass( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] # F (SAP 35) → D (SAP 60) is OK for ECO4 funding.check_funding( @@ -813,7 +813,7 @@ def test_eco4_upgrade_requirement_f_to_e_fail( tenure="Private", ) - measures = [{"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}] + measures = [{"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}] # F (SAP 35) → E (SAP 50) does NOT meet ECO4 rule funding.check_funding( @@ -859,7 +859,7 @@ def test_epc_d_social_no_innovation_no_heating( ) measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45} ] funding.check_funding( @@ -905,10 +905,10 @@ def test_epc_d_social_with_heating_and_insulation( # Should NOT be eligible as the ASHP is not an innovation measure measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( @@ -954,9 +954,9 @@ def test_epc_d_social_solar_with_only_minimum_insulation_should_fail( # Solar PV innovation with insulation, but no heating system upgrade => not eligible measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( @@ -1002,8 +1002,8 @@ def test_epc_d_social_solar_with_ashp_and_no_insulation_should_fail( # Solar PV innovation with heating, but no insulation when insulation is recommended => not eligible measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( @@ -1050,10 +1050,10 @@ def test_epc_d_social_solar_with_heating_and_minimum_insulation_should_pass( # Innovation solar + insulation measures + eligible heating upgrade = not valid because the heat pump isn;t # an innovation measure measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0} ] funding.check_funding( @@ -1095,10 +1095,10 @@ def test_epc_d_social_solar_with_heating_and_minimum_insulation_should_pass( # Innovation solar + insulation measures + eligible heating upgrade = should be valid because the # heat pump is an innovation measure measures2 = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, - {"type": "air_source_heat_pump", "is_innovation": True, "uplift": 0.25} + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "air_source_heat_pump", "is_innovation": True, "innovation_uplift": 0.25} ] funding2.check_funding( @@ -1203,11 +1203,11 @@ def test_uplift( # # TODO: Add a scenario with multiple measures, where some are innovation, some are not and we have # TODO: Make sure private works too measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "internal_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0}, - {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0.25}, + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "internal_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0}, + {"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0.25}, ] funding.check_funding( @@ -1229,7 +1229,7 @@ def test_uplift( ) assert funding.eco4_funding == 5302.3949999999995 - assert funding.full_project_abs == 392.77 # is 280 + the 112.77 innovation uplift + assert funding.full_project_abs == 280 # Doesn't include the eco4 uplift assert funding.eco4_uplift == 112.77 @@ -1311,7 +1311,7 @@ def test_private_epc_e_solar_needs_heating( tenure="Private", ) - measures = [{"type": "solar_pv", "is_innovation": True, "uplift": 0.45}] + measures = [{"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}] funding.check_funding( measures=measures, starting_sap=54, # EPC E - eligible for private on EPC @@ -1360,10 +1360,10 @@ def test_private_epc_e_solar_with_heating_and_minimum_insulation_produces_uplift ) measures = [ - {"type": "solar_pv", "is_innovation": True, "uplift": 0.45}, - {"type": "air_source_heat_pump", "is_innovation": False, "uplift": 0}, - {"type": "cavity_wall_insulation", "is_innovation": False, "uplift": 0}, - {"type": "loft_insulation", "is_innovation": False, "uplift": 0}, + {"type": "solar_pv", "is_innovation": True, "innovation_uplift": 0.45}, + {"type": "air_source_heat_pump", "is_innovation": False, "innovation_uplift": 0}, + {"type": "cavity_wall_insulation", "is_innovation": False, "innovation_uplift": 0}, + {"type": "loft_insulation", "is_innovation": False, "innovation_uplift": 0}, ] funding.check_funding( @@ -1393,3 +1393,85 @@ def test_private_epc_e_solar_with_heating_and_minimum_insulation_produces_uplift assert funding.eco4_uplift and funding.eco4_uplift > 0 # And total funding should include that uplift assert funding.eco4_funding and funding.eco4_funding > 0 + + +def test_existing_gshp_to_ashp(): + r = {'phase': 3, 'parts': [], 'type': 'heating', 'measure_type': 'air_source_heat_pump', + 'description': 'Install a 5KW air source heat pump, and upgrade heating controls to Smart Thermostats, ' + 'room sensors and smart radiator valves (time & temperature zone control). Ensure you have a ' + 'single tariff', + 'starting_u_value': None, 'new_u_value': None, 'sap_points': 7.7, 'already_installed': False, + 'simulation_config': {'mainheat_energy_eff_ending': 'Good', 'hot_water_energy_eff_ending': 'Average', + 'has_air_source_heat_pump_ending': True, 'has_ground_source_heat_pump_ending': False, + 'extra_features_ending': None, + 'thermostatic_control_ending': 'time and temperature zone control', + 'switch_system_ending': None, 'multiple_room_thermostats_ending': False, + 'mainheatc_energy_eff_ending': 'Very Good'}, + 'description_simulation': {'mainheat-description': 'Air source heat pump, radiators, electric', + 'mainheat-energy-eff': 'Good', 'hot-water-energy-eff': 'Average', + 'hotwater-description': 'From main system', + 'mainheatcont-description': 'Time and temperature zone control', + 'mainheatc-energy-eff': 'Very Good'}, 'total': 13188.996000000001, + 'contingency': 3145.8150000000005, 'contingency_rate': 0.35, 'vat': 2080.666, 'labour_hours': 44.7, + 'labour_days': 6.0, 'innovation_rate': 0, 'recommendation_id': '6_phase=3', + 'efficiency': 13188.996000000001, 'co2_equivalent_savings': 0.4999999999999998, + 'heat_demand': 53.20000000000002, 'kwh_savings': 801.5000000000005, + 'energy_cost_savings': 327.31316785714296 + } + + funding = Funding( + project_scores_matrix=mock_project_scores_matrix, + partial_project_scores_matrix=mock_partial_scores_matrix, + whlg_eligible_postcodes=mock_whlg_postcodes, + eco4_social_cavity_abs_rate=13.5, + eco4_social_solid_abs_rate=17, + eco4_private_cavity_abs_rate=13.5, + eco4_private_solid_abs_rate=17, + gbis_social_cavity_abs_rate=21, + gbis_social_solid_abs_rate=25, + gbis_private_cavity_abs_rate=22, + gbis_private_solid_abs_rate=28, + tenure="Private", + ) + + ( + pps, ppf, iu, ups + ) = funding.get_innovation_uplift( + measure=r, + starting_sap=62, + floor_area=69, + is_cavity=True, + current_wall_uvalue=0.7, + is_partial=False, + existing_li_thickness=200, + mainheating={ + 'original_description': 'Ground source heat pump, radiators, electric', + 'clean_description': 'Ground source heat pump, radiators, electric', 'has_radiators': True, + 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False, + 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False, + 'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False, + 'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False, + 'has_community_scheme': False, 'has_ground_source_heat_pump': True, 'has_no_system_present': False, + 'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, + 'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, + 'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_hot-water-only': False, + 'has_electric': True, 'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, + 'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False, + 'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_mineral_and_wood': False, + 'has_dual_fuel_appliance': False, 'has_assumed': False, 'has_electricaire': False, + 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False + }, + main_fuel={ + 'original_description': 'electricity (not community)', + 'clean_description': 'Electricity not community', 'fuel_type': 'electricity', 'tariff_type': None, + 'is_community': False, 'no_individual_heating_or_community_network': False, + 'complex_fuel_type': None + }, + mainheat_energy_eff="Poor", + ) + + # All should be zero + assert pps == 0 + assert ppf == 0 + assert iu == 0 + assert ups == 0 diff --git a/backend/tests/test_integration.py b/backend/tests/test_integration.py index e6bcfce8..cdc27abd 100644 --- a/backend/tests/test_integration.py +++ b/backend/tests/test_integration.py @@ -1,36 +1,36 @@ -import ast -import json +# import ast +# import json from copy import deepcopy -from dataclasses import replace -from datetime import datetime +# from dataclasses import replace +# from datetime import datetime import random from tqdm import tqdm -import pandas as pd +# import pandas as pd import numpy as np from etl.epc.Record import EPCRecord -from backend.SearchEpc import SearchEpc -from sqlalchemy.exc import IntegrityError, OperationalError -from sqlalchemy.orm import sessionmaker -from starlette.responses import Response +# from backend.SearchEpc import SearchEpc +# from sqlalchemy.exc import IntegrityError, OperationalError +# from sqlalchemy.orm import sessionmaker +# from starlette.responses import Response -from backend.app.config import get_settings, get_prediction_buckets -from backend.app.db.connection import db_engine -from backend.app.db.functions.materials_functions import get_materials -from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations -from backend.app.db.functions.property_functions import ( - create_property, create_property_details_epc, create_property_targets, update_property_data, - update_or_create_property_spatial_details -) -from backend.app.db.functions.recommendations_functions import ( - create_plan, upload_recommendations, create_scenario -) -from backend.app.db.functions.funding_functions import upload_funding -from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn -from backend.app.db.models.portfolio import rating_lookup +# from backend.app.config import get_settings, get_prediction_buckets +# from backend.app.db.connection import db_engine +# from backend.app.db.functions.materials_functions import get_materials +# from backend.app.db.functions.portfolio_functions import aggregate_portfolio_recommendations +# from backend.app.db.functions.property_functions import ( +# create_property, create_property_details_epc, create_property_targets, update_property_data, +# update_or_create_property_spatial_details +# ) +# from backend.app.db.functions.recommendations_functions import ( +# create_plan, upload_recommendations, create_scenario +# ) +# from backend.app.db.functions.funding_functions import upload_funding +# from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn +# from backend.app.db.models.portfolio import rating_lookup from backend.app.plan.schemas import PlanTriggerRequest, WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES -from backend.app.plan.utils import get_cleaned -from backend.app.utils import sap_to_epc +# from backend.app.plan.utils import get_cleaned +# from backend.app.utils import sap_to_epc import backend.app.assumptions as assumptions from backend.ml_models.api import ModelApi @@ -41,13 +41,13 @@ from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser import recommendations.optimiser.optimiser_functions as optimiser_functions from recommendations.Recommendations import Recommendations -from utils.logger import setup_logger -from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 -from backend.ml_models.Valuation import PropertyValuation - -from etl.bill_savings.KwhData import KwhData -from etl.spatial.OpenUprnClient import OpenUprnClient -from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc +# from utils.logger import setup_logger +# from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3, read_excel_from_s3 +# from backend.ml_models.Valuation import PropertyValuation +# +# from etl.bill_savings.KwhData import KwhData +# from etl.spatial.OpenUprnClient import OpenUprnClient +# from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc from backend.Funding import Funding from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths @@ -72,7 +72,7 @@ with open("kwh_client_for_deletion.pkl", "rb") as f: kwh_client = pickle.load(f) epc_data = pd.read_csv( - "/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates/domestic-E06000002-Middlesbrough/certificates.csv", + "/Users/khalimconn-kowlessar/Downloads/domestic-E06000002-Middlesbrough/certificates.csv", low_memory=False ) @@ -82,6 +82,12 @@ costs_by_floor_area = epc_data[ ][["TOTAL_FLOOR_AREA", "CURRENT_ENERGY_EFFICIENCY", "LIGHTING_COST_CURRENT", "HEATING_COST_CURRENT", "HOT_WATER_COST_CURRENT"]].copy() +epc_data = epc_data[ + (epc_data["MAINHEAT_DESCRIPTION"].str.contains("SAP05:") == False) & + (~epc_data["LIGHTING_COST_CURRENT"].isin([None, ""])) & + (~pd.isnull(epc_data["LIGHTING_COST_CURRENT"])) + ] + costs_by_floor_area.columns = [c.lower().replace("_", "-") for c in costs_by_floor_area.columns] for c in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: costs_by_floor_area[c + "_scaled"] = costs_by_floor_area[c] / costs_by_floor_area["total-floor-area"] @@ -90,8 +96,10 @@ costs_by_floor_area = costs_by_floor_area.groupby("current-energy-efficiency")[ ["lighting-cost-current_scaled", "heating-cost-current_scaled", "hot-water-cost-current_scaled"] ].mean().reset_index() -sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2015-01-01"].drop_duplicates("UPRN").sample( - 1000).reset_index(drop=True) +epc_data = epc_data[~pd.isnull(epc_data["UPRN"])] + +sample_epc_data = epc_data[pd.to_datetime(epc_data["LODGEMENT_DATE"]) >= "2008-01-01"].drop_duplicates("UPRN").sample( + 50000).reset_index(drop=True) # TODO: In Property find_energy_sources, sort out biomass community heating - what fuel type # TODO: We might be able to remove find_energy_sources entirely and remove estimate_electrical_consumption. It's used @@ -161,6 +169,8 @@ mocked_kwh_predictions["heating_kwh_predictions"] = pd.DataFrame(mocked_kwh_pred mocked_kwh_predictions["hotwater_kwh_predictions"] = pd.DataFrame(mocked_kwh_predictions["hotwater_kwh_predictions"]) # TODO: We might want to implement this generally, via an ETL process +for x in cleaned["mainheat-description"]: + x["has_wood_chips"] = False for p in input_properties: for col in ["lighting-cost-current", "heating-cost-current", "hot-water-cost-current"]: if pd.isnull(p.data[col]): @@ -302,10 +312,19 @@ body = PlanTriggerRequest( 'sheet_name': None, 'sheet_count': None, 'index_start': None, 'index_end': None} ) +eco_packages = {} +# For testing +for p in input_properties: + eco_packages[p.id] = (None, None, None) + for p in tqdm(input_properties): if not recommendations.get(p.id): continue + # Temp allow to skip + if not isinstance(recommendations.get(p.id)[0], list): + continue + # we need to double unlist because we have a list of lists property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs} property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures] @@ -327,34 +346,34 @@ for p in tqdm(input_properties): fixed_gain = optimiser_functions.calculate_fixed_gain( property_required_measures, recommendations, p, needs_ventilation ) - gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain) + gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain, eco_packages=eco_packages) - funding = Funding( - tenure="Social", - project_scores_matrix=project_scores_matrix, - partial_project_scores_matrix=partial_project_scores_matrix, - whlg_eligible_postcodes=whlg_eligible_postcodes, - eco4_social_cavity_abs_rate=12.5, - eco4_social_solid_abs_rate=17, - eco4_private_cavity_abs_rate=12.5, - eco4_private_solid_abs_rate=17, - gbis_social_cavity_abs_rate=21, - gbis_social_solid_abs_rate=25, - gbis_private_cavity_abs_rate=21, - gbis_private_solid_abs_rate=28, - ) - - li_thickness = convert_thickness_to_numeric( - p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] - ) - current_wall_u_value = p.walls["thermal_transmittance"] - if current_wall_u_value is None: - current_wall_u_value = get_wall_u_value( - clean_description=p.walls["clean_description"], - age_band=p.age_band, - is_granite_or_whinstone=p.walls["is_granite_or_whinstone"], - is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"], - ) + # funding = Funding( + # tenure=body.housing_type, + # project_scores_matrix=project_scores_matrix, + # partial_project_scores_matrix=partial_project_scores_matrix, + # whlg_eligible_postcodes=whlg_eligible_postcodes, + # eco4_social_cavity_abs_rate=13, + # eco4_social_solid_abs_rate=17, + # eco4_private_cavity_abs_rate=13, + # eco4_private_solid_abs_rate=17, + # gbis_social_cavity_abs_rate=21, + # gbis_social_solid_abs_rate=25, + # gbis_private_cavity_abs_rate=21, + # gbis_private_solid_abs_rate=28, + # ) + # + # li_thickness = convert_thickness_to_numeric( + # p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] + # ) + # current_wall_u_value = p.walls["thermal_transmittance"] + # if current_wall_u_value is None: + # current_wall_u_value = get_wall_u_value( + # clean_description=p.walls["clean_description"], + # age_band=p.age_band, + # is_granite_or_whinstone=p.walls["is_granite_or_whinstone"], + # is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"], + # ) # We insert the innovation uplift measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) @@ -362,41 +381,53 @@ for p in tqdm(input_properties): # TODO: Turn this into a function and store the innovaiton uplift for group in measures_to_optimise_with_uplift: for r in group: - - if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating", - "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]: - ( - r["partial_project_score"], - r["partial_project_funding"], - r["innovation_uplift"], - r["uplift_project_score"], - ) = ( - 0, 0, 0, 0 - ) - continue - - ( - r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], - r["uplift_project_score"] - ) = funding.get_innovation_uplift( - measure=r, - starting_sap=p.data["current-energy-efficiency"], - floor_area=p.floor_area, - is_cavity=p.walls["is_cavity_wall"], - current_wall_uvalue=current_wall_u_value, - is_partial="partial" in p.walls["clean_description"].lower(), - existing_li_thickness=li_thickness, - mainheating=p.main_heating, - main_fuel=p.main_fuel, - mainheat_energy_eff=p.data["mainheat-energy-eff"], + (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + r["uplift_project_score"]) = ( + 0, 0, 0, 0 ) + # if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating", + # "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]: + # ( + # r["partial_project_score"], + # r["partial_project_funding"], + # r["innovation_uplift"], + # r["uplift_project_score"], + # ) = ( + # 0, 0, 0, 0 + # ) + # continue + # + # ( + # r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + # r["uplift_project_score"] + # ) = funding.get_innovation_uplift( + # measure=r, + # starting_sap=int(p.data["current-energy-efficiency"]), + # floor_area=p.floor_area, + # is_cavity=p.walls["is_cavity_wall"], + # current_wall_uvalue=current_wall_u_value, + # is_partial="partial" in p.walls["clean_description"].lower(), + # existing_li_thickness=li_thickness, + # mainheating=p.main_heating, + # main_fuel=p.main_fuel, + # mainheat_energy_eff=p.data["mainheat-energy-eff"], + # ) + + if r["already_installed"]: + # if already installed, we zero out the uplift and funding + (r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + r["uplift_project_score"]) = ( + 0, 0, 0, 0 + ) + input_measures = optimiser_functions.prepare_input_measures( - measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True + measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True, + property_eco_packages=eco_packages.get(p.id) ) # When the goal is Increasing EPC, we can run the funding optimiser - if body.goal == "Increasing EPC": + if body.goal == "Switch off": solutions = optimise_with_funding_paths( p=p, @@ -404,20 +435,14 @@ for p in tqdm(input_properties): housing_type=body.housing_type, budget=body.budget, target_gain=gain, - funding=funding + funding=funding, + work_package=eco_packages[p.id][2] ) - # Given the solutions we select the optimal one - solutions["cost_less_full_project_funding"] = np.where( - solutions["scheme"] == "eco4", - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], - solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] - ) - - solutions["cost_less_full_project_funding"] = ( - solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"] - ) - solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) + # If the solution isn't eligible, we can't really consider it + solutions = solutions[ + (solutions["is_eligible"] & (solutions["scheme"] != "none")) | (solutions["scheme"] == "none") + ] if solutions["meets_upgrade_target"].any(): # If we have a solution that meets the upgrade target, we select that one @@ -428,9 +453,13 @@ for p in tqdm(input_properties): # This is the list of measures that we will recommend scheme = optimal_solution["scheme"] - funded_measures = optimal_solution["items"] if scheme != "none" else [] - solution = optimal_solution["items"] + optimal_solution["unfunded_items"] - # This is the total amount of funding that the project will produce (including uplifts) (£) + + # We create this full list of selected measures, which is used in the next section for setting + # default measures + solution = deepcopy(optimal_solution["items"]) + deepcopy(optimal_solution["unfunded_items"]) + funded_measures = deepcopy(optimal_solution["items"]) if scheme != "none" else [] + + # This is the total amount of funding that the project will produce (EXCLUDING uplifts) (£) project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ optimal_solution["partial_project_funding"] # This is the total amount of funding associated to the uplift (£) @@ -468,37 +497,43 @@ for p in tqdm(input_properties): ROOF_INSULATION_MEASURES ) - funding.check_funding( - measures=solution, - starting_sap=p.data["current-energy-efficiency"], - ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]), - floor_area=p.floor_area, - mainheat_description=p.main_heating["clean_description"], - heating_control_description=p.main_heating_controls["clean_description"], - is_cavity=p.walls["is_cavity_wall"], - current_wall_uvalue=current_wall_u_value, - is_partial="partial" in p.walls["clean_description"].lower(), - existing_li_thickness=li_thickness, - mainheating=p.main_heating, - main_fuel=p.main_fuel, - mainheat_energy_eff=p.data["mainheat-energy-eff"], - has_wall_insulation_recommendation=has_wall_insulation_recommendation, - has_roof_insulation_recommendation=has_roof_insulation_recommendation, - ) + # funding.check_funding( + # measures=solution, + # starting_sap=int(p.data["current-energy-efficiency"]), + # ending_sap=int(p.data["current-energy-efficiency"]) + sum([x["gain"] for x in solution]), + # floor_area=p.floor_area, + # mainheat_description=p.main_heating["clean_description"], + # heating_control_description=p.main_heating_controls["clean_description"], + # is_cavity=p.walls["is_cavity_wall"], + # current_wall_uvalue=current_wall_u_value, + # is_partial="partial" in p.walls["clean_description"].lower(), + # existing_li_thickness=li_thickness, + # mainheating=p.main_heating, + # main_fuel=p.main_fuel, + # mainheat_energy_eff=p.data["mainheat-energy-eff"], + # has_wall_insulation_recommendation=has_wall_insulation_recommendation, + # has_roof_insulation_recommendation=has_roof_insulation_recommendation, + # ) # Determine the scheme scheme = "none" - if funding.eco4_eligible: - scheme = "eco4" - if scheme == "none" and funding.gbis_eligible: - scheme = "gbis" + # if funding.eco4_eligible: + # scheme = "eco4" + # if scheme == "none" and funding.gbis_eligible: + # scheme = "gbis" - funded_measures = solution if scheme in ["gbis", "eco4"] else [] - project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs - total_uplift = funding.eco4_uplift - full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs - partial_project_score = funding.partial_project_abs - uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift + funded_measures = [] + # funded_measures = solution if scheme in ["gbis", "eco4"] else [] + # project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs + project_funding = 0 + # total_uplift = funding.eco4_uplift + total_uplift = 0 + # full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs + full_project_score = 0 + # partial_project_score = funding.partial_project_abs + partial_project_score = 0 + # uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift + uplift_project_score = 0 selected = {r["id"] for r in solution} @@ -510,10 +545,10 @@ for p in tqdm(input_properties): # Add best practice measures (ventilation/trickle vents) selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) - # Final flattening - Don't do this! - # recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( - # p.id, recommendations, selected - # ) + # Final flattening + recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( + p.id, recommendations, selected + ) # TODO: functionise for measure in funded_measures: @@ -529,3 +564,231 @@ for p in tqdm(input_properties): partial_project_score=partial_project_score, uplift_project_score=uplift_project_score ) + +# for p in tqdm(input_properties): +# if not recommendations.get(p.id): +# continue +# +# # we need to double unlist because we have a list of lists +# property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs} +# property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures] +# measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures] +# +# # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore +# # its inclusion +# needs_ventilation = any( +# x in property_measure_types for x in assumptions.measures_needing_ventilation +# ) and not p.has_ventilation +# +# if not measures_to_optimise: +# # Nothing to do, we just reshape the recommendations +# recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( +# p.id, recommendations, set() +# ) +# continue +# +# fixed_gain = optimiser_functions.calculate_fixed_gain( +# property_required_measures, recommendations, p, needs_ventilation +# ) +# gain = optimiser_functions.calculate_gain(body=body, p=p, fixed_gain=fixed_gain) +# +# funding = Funding( +# tenure="Social", +# project_scores_matrix=project_scores_matrix, +# partial_project_scores_matrix=partial_project_scores_matrix, +# whlg_eligible_postcodes=whlg_eligible_postcodes, +# eco4_social_cavity_abs_rate=12.5, +# eco4_social_solid_abs_rate=17, +# eco4_private_cavity_abs_rate=12.5, +# eco4_private_solid_abs_rate=17, +# gbis_social_cavity_abs_rate=21, +# gbis_social_solid_abs_rate=25, +# gbis_private_cavity_abs_rate=21, +# gbis_private_solid_abs_rate=28, +# ) +# +# li_thickness = convert_thickness_to_numeric( +# p.roof["insulation_thickness"], p.roof["is_pitched"], p.roof["is_flat"] +# ) +# current_wall_u_value = p.walls["thermal_transmittance"] +# if current_wall_u_value is None: +# current_wall_u_value = get_wall_u_value( +# clean_description=p.walls["clean_description"], +# age_band=p.age_band, +# is_granite_or_whinstone=p.walls["is_granite_or_whinstone"], +# is_sandstone_or_limestone=p.walls["is_sandstone_or_limestone"], +# ) +# +# # We insert the innovation uplift +# measures_to_optimise_with_uplift = deepcopy(measures_to_optimise) +# +# # TODO: Turn this into a function and store the innovaiton uplift +# for group in measures_to_optimise_with_uplift: +# for r in group: +# +# if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating", +# "extension_cavity_wall_insulation", "draught_proofing", "sealing_open_fireplace"]: +# ( +# r["partial_project_score"], +# r["partial_project_funding"], +# r["innovation_uplift"], +# r["uplift_project_score"], +# ) = ( +# 0, 0, 0, 0 +# ) +# continue +# +# ( +# r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], +# r["uplift_project_score"] +# ) = funding.get_innovation_uplift( +# measure=r, +# starting_sap=p.data["current-energy-efficiency"], +# floor_area=p.floor_area, +# is_cavity=p.walls["is_cavity_wall"], +# current_wall_uvalue=current_wall_u_value, +# is_partial="partial" in p.walls["clean_description"].lower(), +# existing_li_thickness=li_thickness, +# mainheating=p.main_heating, +# main_fuel=p.main_fuel, +# mainheat_energy_eff=p.data["mainheat-energy-eff"], +# ) +# +# input_measures = optimiser_functions.prepare_input_measures( +# measures_to_optimise_with_uplift, body.goal, needs_ventilation, funding=True +# ) +# +# # When the goal is Increasing EPC, we can run the funding optimiser +# if body.goal == "Increasing EPC": +# +# solutions = optimise_with_funding_paths( +# p=p, +# input_measures=input_measures, +# housing_type=body.housing_type, +# budget=body.budget, +# target_gain=gain, +# funding=funding +# ) +# +# # Given the solutions we select the optimal one +# solutions["cost_less_full_project_funding"] = np.where( +# solutions["scheme"] == "eco4", +# solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], +# solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] +# ) +# +# solutions["cost_less_full_project_funding"] = ( +# solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"] +# ) +# solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) +# +# if solutions["meets_upgrade_target"].any(): +# # If we have a solution that meets the upgrade target, we select that one +# optimal_solution = solutions[solutions["meets_upgrade_target"]].iloc[0] +# else: +# # Pick the cheapest +# optimal_solution = solutions.iloc[0] +# +# # This is the list of measures that we will recommend +# scheme = optimal_solution["scheme"] +# funded_measures = optimal_solution["items"] if scheme != "none" else [] +# solution = optimal_solution["items"] + optimal_solution["unfunded_items"] +# # This is the total amount of funding that the project will produce (including uplifts) (£) +# project_funding = optimal_solution["full_project_funding"] if scheme == "eco4" else \ +# optimal_solution["partial_project_funding"] +# # This is the total amount of funding associated to the uplift (£) +# total_uplift = optimal_solution["total_uplift"] +# # This is the funding scheme selected +# # This is the full project ABS +# full_project_score = optimal_solution["project_score"] +# # This is the partial project ABS +# partial_project_score = optimal_solution["partial_project_score"] +# # This is the uplift score ABS +# uplift_project_score = optimal_solution["total_uplift_score"] +# else: +# # We optimise and then we determine eligibility for funding, based on the measures selected +# optimiser = ( +# GainOptimiser( +# input_measures, max_cost=body.budget, max_gain=gain, allow_slack=False +# ) if body.budget else CostOptimiser(input_measures, min_gain=gain) +# ) +# optimiser.setup() +# optimiser.solve() +# solution = optimiser.solution +# +# recommendation_types = [] +# for measures in input_measures: +# for measure in measures: +# recommendation_types.append(measure["type"]) +# recommendation_types = set(recommendation_types) +# +# has_wall_insulation_recommendation = any( +# (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in +# WALL_INSULATION_MEASURES +# ) +# has_roof_insulation_recommendation = any( +# (m in recommendation_types or "+".join([m, "mechanical_ventilation"])) for m in +# ROOF_INSULATION_MEASURES +# ) +# +# funding.check_funding( +# measures=solution, +# starting_sap=p.data["current-energy-efficiency"], +# ending_sap=p.data["current-energy-efficiency"] + sum([x["gain"] for x in solution]), +# floor_area=p.floor_area, +# mainheat_description=p.main_heating["clean_description"], +# heating_control_description=p.main_heating_controls["clean_description"], +# is_cavity=p.walls["is_cavity_wall"], +# current_wall_uvalue=current_wall_u_value, +# is_partial="partial" in p.walls["clean_description"].lower(), +# existing_li_thickness=li_thickness, +# mainheating=p.main_heating, +# main_fuel=p.main_fuel, +# mainheat_energy_eff=p.data["mainheat-energy-eff"], +# has_wall_insulation_recommendation=has_wall_insulation_recommendation, +# has_roof_insulation_recommendation=has_roof_insulation_recommendation, +# ) +# +# # Determine the scheme +# scheme = "none" +# if funding.eco4_eligible: +# scheme = "eco4" +# if scheme == "none" and funding.gbis_eligible: +# scheme = "gbis" +# +# funded_measures = solution if scheme in ["gbis", "eco4"] else [] +# project_funding = 0 if funding.full_project_abs is not None else funding.full_project_abs +# total_uplift = funding.eco4_uplift +# full_project_score = 0 if funding.full_project_abs is not None else funding.full_project_abs +# partial_project_score = funding.partial_project_abs +# uplift_project_score = funding.eco4_uplift if scheme == "eco4" else funding.gbis_uplift +# +# selected = {r["id"] for r in solution} +# +# if property_required_measures: +# solution = optimiser_functions.add_required_measures( +# property_id=p.id, property_required_measures=property_required_measures, +# recommendations=recommendations, selected=selected, +# ) +# +# # Add best practice measures (ventilation/trickle vents) +# selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) +# # Final flattening - Don't do this! +# # recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( +# # p.id, recommendations, selected +# # ) +# +# # TODO: functionise +# for measure in funded_measures: +# if "+mechanical_ventilation" in measure["type"]: +# measure["type"] = measure["type"].split("+mechanical_ventilation")[0] +# +# p.insert_funding( +# scheme=scheme, +# funded_measures=funded_measures, +# project_funding=project_funding, +# total_uplift=total_uplift, +# full_project_score=full_project_score, +# partial_project_score=partial_project_score, +# uplift_project_score=uplift_project_score +# ) diff --git a/backend/tests/test_search_epc.py b/backend/tests/test_search_epc.py index 9bb7c39a..a0fef7e9 100644 --- a/backend/tests/test_search_epc.py +++ b/backend/tests/test_search_epc.py @@ -26,7 +26,7 @@ class TestSearchEpcIntegration: # Test case 2: Another valid address and postcode # In this case, the newest EPC, does not have a uprn associated to it. If we did a search by # uprn, we would get an old EPC - ("Flat 8, Hainton House", "DN32 9AQ", 10090082018, True, + ("Flat 8, Hainton House", "DN32 9AQ", "", True, "bd1149a20a73397184f07a9955f872424826e70f4870c058d71be887766ee1f8", 2), # Test case 3: When we make a request to the API for this property, we get back results for # flats 1, 2 and 3. We have some logic to handle the response so that we get back flat 1 @@ -56,7 +56,6 @@ class TestSearchEpcIntegration: # We check that we have the correct epc assert epc_searcher.newest_epc["lmk-key"] == lmk_key - assert epc_searcher.newest_epc["uprn"] == uprn assert len(epc_searcher.older_epcs) == n_old_epcs def test_search_housenumber(self): diff --git a/etl/battery_model/train.py b/etl/battery_model/train.py new file mode 100644 index 00000000..086f68cb --- /dev/null +++ b/etl/battery_model/train.py @@ -0,0 +1,62 @@ +import pandas as pd +from sklearn.linear_model import Ridge + + +class SAPUpliftTrainer: + """ + Offline training class — discovers SAP uplift model coefficients. + """ + + def __init__(self, alpha=1.0): + self.alpha = alpha + self.model = Ridge(alpha=self.alpha) + self.feature_names = ["starting SAP", "PV Array size"] + + def prepare_data(self, df): + df = df.copy() + # df["is_electric"] = df["heating"].str.contains( + # "Electric", case=False, na=False + # ).astype(int) + X = df[self.feature_names] + y = df["SAP points"] + return X, y + + def fit(self, df): + X, y = self.prepare_data(df) + self.model.fit(X, y) + + def coefficients(self): + return { + "intercept": float(self.model.intercept_), + **{ + name: float(coef) + for name, coef in zip(self.feature_names, self.model.coef_) + } + } + + def export_runtime_config(self): + """ + Returns a dict suitable for copy-pasting into the runtime scoring class. + """ + coefs = self.coefficients() + return { + "intercept": coefs["intercept"], + "coef_starting_sap": coefs["starting SAP"], + "coef_pv_size": coefs["PV Array size"], + # "coef_is_electric": coefs["is_electric"], + } + + +# The training data can be found in the Domna sharepoint in Product Development > Solar Battery Recommendations +df = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/SAP Movement data(Sheet1).csv") + +trainer = SAPUpliftTrainer(alpha=1.0) +trainer.fit(df) + +print(trainer.coefficients()) +print(trainer.export_runtime_config()) + +# Last updated: 9th December 2025 +# Coefficients: +# {'intercept': 10.310168559226678, 'starting SAP': -0.16120648633993315, 'PV Array size': 1.0500492005420736} +# The code for scoring with this model can be found in backend/app/BatterySapScorer.py diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py index 24ce9f2c..3291e909 100644 --- a/etl/bill_savings/KwhData.py +++ b/etl/bill_savings/KwhData.py @@ -310,7 +310,7 @@ class KwhData: False: "N", None: "N", "Y": "Y", - "N": "N" + "N": "N", } for v in bools_to_remap: epc[v] = bool_map[epc[v]] diff --git a/etl/customers/lincs_rural/get_missed.py b/etl/customers/lincs_rural/get_missed.py new file mode 100644 index 00000000..d25449c5 --- /dev/null +++ b/etl/customers/lincs_rural/get_missed.py @@ -0,0 +1,47 @@ +# After going back to Lincs rural, they gave us some additional data that we can use to try to fetch missed UPRNs again +import pandas as pd + +# missed = pd.read_excel( +# "/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_nov_2025.xlsx", +# sheet_name="Missed Properties" +# ) +# missed = missed[~pd.isnull(missed["rrn"])] + +prepared = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/lincs_rural_standardised_ara_nov_2025.xlsx", + sheet_name="Standardised Asset List" +) + +updated_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/MASTER LIST EPCS UPDATED November 2025 Domna Homes - Copy.xlsx", + sheet_name="PROPERTY EPC RATINGS" +) +updated_data = updated_data[~pd.isnull(updated_data["Property Ref."])] + +missed = updated_data[~updated_data["Property Ref."].isin(prepared["landlord_property_id"].values.tolist())].copy() +# missed.to_csv("/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_uprn.csv") +# We'll grab the UPRNs manually and then pull them in, and prepare for ARA + +missing_uprns = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_uprn.csv") + +missing_uprns["landlord_property_id"] = missing_uprns["Property Ref."].copy() +missing_uprns["domna_property_id"] = missing_uprns["Property Ref."].copy() +missing_uprns["domna_address_1"] = missing_uprns['Unnamed: 1'].str.split(",").str[0].str.strip() +missing_uprns["postcode"] = missing_uprns['Unnamed: 1'].str.split(",").str[-1].str.strip() +missing_uprns["landlord_property_type"] = "unknown" +missing_uprns["landlord_built_form"] = "unknown" +missing_uprns["domna_full_address"] = missing_uprns['Unnamed: 1'].copy() + +missed_standardised_for_ara = missing_uprns[ + ['landlord_property_id', 'domna_address_1', 'landlord_property_type', 'landlord_built_form', 'postcode', + 'domna_property_id', 'UPRN'] +].rename( + columns={"UPRN": "epc_os_uprn"} +) + +# Store +missed_standardised_for_ara.to_excel( + "/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_standardised_ara_nov_2025.xlsx", + index=False, + sheet_name="Standardised Asset List" +) diff --git a/etl/customers/lincs_rural/prepare_data.py b/etl/customers/lincs_rural/prepare_data.py new file mode 100644 index 00000000..0a3be7fe --- /dev/null +++ b/etl/customers/lincs_rural/prepare_data.py @@ -0,0 +1,91 @@ +""" +Rough script to prepare the data for Lincs Rural project +""" +from tqdm import tqdm +import pandas as pd +import os +from dotenv import load_dotenv +from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc +from backend.SearchEpc import SearchEpc + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +data = pd.read_excel( + "/Users/khalimconn-kowlessar/Downloads/MASTER LIST EPCS UPDATED November 2025 Domna Homes.xlsx", + sheet_name="PROPERTY EPC RATINGS" +) + +# We have property RRNs - we need UPRN + +standardised_ara_list = [] +missed = [] +for _, x in tqdm(data.iterrows(), total=len(data)): + try: + rrn = x["EPC Ref."] + + # Fetch from find my epc + retriever = RetrieveFindMyEpc( + address="", + postcode="", + rrn=rrn, + address_postal_town="", + ) + + find_epc_data = retriever.retrieve_newest_find_my_epc_data(rrn=rrn) + + # Find the UPRN + epc_searcher = SearchEpc( + address1=str(find_epc_data["address1"]), + postcode=str(find_epc_data["postcode"]), + auth_token=EPC_AUTH_TOKEN, + os_api_key="", + property_type=None, + fast=False, + full_address=",".join([find_epc_data["address1"], find_epc_data["address2"]]), + max_retries=5, + ) + epc_searcher.find_property(skip_os=True) + + # Append in format we need + # Stuff we need: + standardised_ara_list.append( + { + "landlord_property_id": x["Property Ref."], + "domna_address_1": find_epc_data["address1"], + "postcode": find_epc_data["postcode"], + "landlord_property_type": epc_searcher.newest_epc.get("property-type"), + "landlord_built_form": epc_searcher.newest_epc.get("built-form"), + "landlord_heating_system": epc_searcher.newest_epc.get("mainheat-description", ""), + "epc_os_uprn": epc_searcher.newest_epc.get("uprn"), + "domna_property_id": x["Property Ref."], + "domna_full_address": epc_searcher.newest_epc.get( + "address", ", ".join([ + find_epc_data["address1"], + find_epc_data["address2"], + ]) + ), + } + ) + except Exception as e: + missed.append({ + "property_ref": x["Property Ref."], + "rrn": x["EPC Ref."], + "error": str(e) + }) + +missed_df = pd.DataFrame(missed) + +# Store +standardised_ara_df = pd.DataFrame(standardised_ara_list) +standardised_ara_df.to_excel( + "/Users/khalimconn-kowlessar/Downloads/lincs_rural_standardised_ara_nov_2025.xlsx", + index=False, + sheet_name="Standardised Asset List" +) +# Store missed +missed_df.to_excel( + "/Users/khalimconn-kowlessar/Downloads/lincs_rural_missed_nov_2025.xlsx", + index=False, + sheet_name="Missed Properties" +) diff --git a/etl/customers/orbit/funding_example_portfolio.py b/etl/customers/orbit/funding_example_portfolio.py index cf0e151f..c1ade44d 100644 --- a/etl/customers/orbit/funding_example_portfolio.py +++ b/etl/customers/orbit/funding_example_portfolio.py @@ -114,7 +114,7 @@ def app(): "lighting", "secondary_heating", "boiler_upgrade", - "high_heat_retention_storage_heater", + "high_heat_retention_storage_heaters", ], "budget": None, } diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/a_data_prep.py b/etl/customers/peabody/Nov 2025 Consulting Project/a_data_prep.py new file mode 100644 index 00000000..4dd07ee5 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/a_data_prep.py @@ -0,0 +1,369 @@ +""" +This scipt prepares the raw data that was sent over by Peabody for production of +a standardised asset list + +They have sent over just short of 100,000 properties and so, to make this easier, we will do the following +1) Break the data up into subsets of 25,000 +2) Combine the data provided into a single list +""" +import json +import time +import os +import pandas as pd +import numpy as np +from tqdm import tqdm +from dotenv import load_dotenv +from asset_list.utils import get_data_for_property +from utils.logger import setup_logger + +logger = setup_logger() + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +property_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Properties" +) +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +# Basic overview: +# 1) We have 10,634 postcodes. If we needed to make requests to the ordnance survey API for +# all of these postcodes, it would cost at least £106, not accounting for double requests for postcodes +# where we have more than 100 properties (WE DONT!) +# 2) This is on average 9.36 properties per postcode +# 3) The UPRN in the property_list matches to the Org Ref in the sustainability data. These +# is an additional UPRN column in sustainability data which appears to be the ordnance survey UPRN +# 4) There appears to be some anomalous records, e.g. a flat with 543 m2 floor area and another flat +# with 6m2 floor area +# 5) Based on the residential indicator, all properties appear to be resi +# 6) We should do some quick calcs on how much it might cost to fetch all of the solar API data +# 7) We have 8785 missing UPRNS, which we should potentially try and fill +# 8) In the backend, we should probably start storing the raw EPC input data to allow for much quicker +# re-runs. All we really need to do is store the find my EPC data, perhaps against UPRN and RRN, as well +# as the raw EPC data, against uprn. This will be useful for scenario re-builds and will be much much +# quicker, as a starting point. Do we store in the database vs s3? TBC + +n_postcodes = property_list["Post Code"].nunique() +postcode_summary = property_list.groupby("Post Code")["UPRN"].count().reset_index() +postcode_summary["UPRN"].mean() + + +def classify_floor_area(x): + if x <= 72: + return "0-72" + if x <= 97: + return "73-97" + if x <= 199: + return "98-199" + return "200+" + + +sustainability_data["Postal Region"] = sustainability_data["Postcode"].str.split(" ").str[0] +sustainability_data["Floor Area Band"] = sustainability_data["Total Floor Area (m2)"].apply( + lambda x: classify_floor_area(x) +) + +# Archetype reductions + +# Roof insulation category +# 1) Split roof insulation into > 100mm loft and <= 100mm loft +sustainability_data["Roof Insulation Category"] = sustainability_data["Roof Insulation"].copy() +# sustainability_data["Roof Insulation Category"] = np.where( +# sustainability_data["Roof Insulation Category"].isin( +# ['mm200', 'mm300', 'mm250', 'mm150', 'mm270', 'mm400', 'mm350'], +# ), +# "LI > 100mm", +# sustainability_data["Roof Insulation Category"], +# ) + +# sustainability_data["Roof Insulation Category"] = np.where( +# sustainability_data["Roof Insulation Category"].isin( +# ['mm100', 'mm50', 'mm75', 'mm25'], +# ), +# "LI <= 100mm", +# sustainability_data["Roof Insulation Category"], +# ) + +# 2) Group all of the glazed together (e.g. double glazed, secondary glazed, triple glazed) +sustainability_data["Glazing Type"] = sustainability_data["Glazing"].copy() +# sustainability_data["Glazing Type"] = np.where( +# sustainability_data["Glazing Type"].isin( +# ['Double 2002 or later', 'Double before 2002', 'Double but age unknown', 'DoubleKnownData'] +# ), +# "Double Glazed", +# sustainability_data["Glazing Type"], +# ) +# sustainability_data["Glazing Type"] = np.where( +# sustainability_data["Glazing Type"].isin(['Triple', 'TripleKnownData']), +# "Triple Glazed", +# sustainability_data["Glazing Type"], +# ) + +# 3) Group up boiler efficiency A, B-D, E - G? or someting like this +sustainability_data["Boiler Efficiency Group"] = sustainability_data["Boiler Efficiency"].copy() +# sustainability_data["Boiler Efficiency Group"] = np.where( +# sustainability_data["Boiler Efficiency Group"].isin(['B', 'C', 'D']), +# "B-D", +# sustainability_data["Boiler Efficiency Group"], +# ) +# sustainability_data["Boiler Efficiency Group"] = np.where( +# sustainability_data["Boiler Efficiency Group"].isin(['E', 'F', 'G']), +# "E-G", +# sustainability_data["Boiler Efficiency Group"], +# ) + +# 4) Group up main fuel into gas, electric, oil, other? +sustainability_data["Main Fuel Group"] = sustainability_data["Main Fuel"].copy() +# sustainability_data["Main Fuel Group"] = np.where( +# sustainability_data["Main Fuel Group"].isin( +# ["SmokelessCoal", "BiomassCommunity", "B30DCommunity"] +# ), +# "Other Fuel", +# sustainability_data["Main Fuel Group"], +# ) + +# 5) Wall Construction - group up Sandstone and Granite into one category +# sustainability_data["Wall Construction"] = np.where( +# sustainability_data["Wall Construction"].isin(["Sandstone", "Granite"]), +# "Sandstone/Granite", +# sustainability_data["Wall Construction"] +# ) + +# sustainability_data["Wall Construction"] = np.where( +# sustainability_data["Wall Construction"].isin(["Timber Frame", "System"]), +# "Timber/System", +# sustainability_data["Wall Construction"] +# ) + +# 6) Reduce or remove floor construction +# sustainability_data["Floor Construction"] = np.where( +# sustainability_data["Floor Construction"].isin(["SuspendedTimber", "SuspendedNotTimber"]), +# "Suspended Floor", +# sustainability_data["Floor Construction"] +# ) + +# 7) Reduce wall insulation +# sustainability_data["Wall Insulation"] = np.where( +# sustainability_data["Wall Insulation"].isin( +# ["FilledCavityPlusInternal", "FilledCavityPlusExternal", "FilledCavity", "External", "Internal"] +# ), +# "Insulated", +# sustainability_data["Wall Insulation"] +# ) + +# 8) Fill floor insulation +sustainability_data["Floor Insulation"] = sustainability_data["Floor Insulation"].fillna("Unknown") + +# 9) Reduce Age bands +# sustainability_data["Construction Years"] = np.where( +# sustainability_data["Construction Years"].isin(["2003-2006", "2007-2011", "2012 onwards"]), +# "2003 onwards", +# sustainability_data["Construction Years"], +# ) + +# sustainability_data["Construction Years"] = np.where( +# sustainability_data["Construction Years"].isin(["Before 1900", "1900-1929"]), +# "Before 1929", +# sustainability_data["Construction Years"], +# ) + +# sustainability_data["Construction Years"] = np.where( +# sustainability_data["Construction Years"].isin(["1983-1990", "1991-1995"]), +# "1983-1995", +# sustainability_data["Construction Years"], +# ) + +# sustainability_data["Construction Years"] = np.where( +# sustainability_data["Construction Years"].isin(["1950-1966", "1967-1975", "1976-1982"]), +# "1950-1982", +# sustainability_data["Construction Years"], +# ) + +# Roof +# sustainability_data["Roof Construction"] = np.where( +# sustainability_data["Roof Construction"].isin( +# ["PitchedNormalLoftAccess", "PitchedThatched", "PitchedNormalNoLoftAccess", "PitchedWithSlopingCeiling"] +# ), +# "Pitched Roof", +# sustainability_data["Roof Construction"] +# ) + +archetype_variables = [ + "Type", "Attachment", "Construction Years", "Wall Construction", "Wall Insulation", + "Roof Construction", "Roof Insulation Category", "Floor Construction", "Floor Insulation", + "Glazing Type", "Heating", "Boiler Efficiency Group", "Main Fuel Group", "Controls Adequacy", + "Floor Area Band" +] + +archetypes = sustainability_data[archetype_variables + ["UPRN"]].dropna().groupby(archetype_variables)[ + "UPRN"].nunique().reset_index().rename(columns={"UPRN": "Count"}).sort_values(by="Count", + ascending=False).reset_index( + drop=True) + +# We take a sample that represents 95% of the properties +archetypes["Cumulative Count"] = archetypes["Count"].cumsum() +archetypes["Cumulative Proportion"] = archetypes["Cumulative Count"] / archetypes["Count"].sum() + +archetypes_95 = archetypes.copy() +archetypes_95["Archetypes_95_reference"] = archetypes_95.index + 1 +archetypes_95["Archetypes_95_reference"] = "Archetype_Sample_" + archetypes_95["Archetypes_95_reference"].astype(str) + +# For the sample, look for invalid looking UPRNs and remove them. +sample_from = sustainability_data.copy() +# 1) Check for UPRNs that are not numeric or begin with a Zero +sample_from["uprn_not_numeric"] = ~sample_from["UPRN"].apply(lambda x: str(x).isnumeric()) +sample_from = sample_from[~sample_from["uprn_not_numeric"]] + +sample_from["uprn_has_leading_zero"] = sample_from["UPRN"].apply(lambda x: str(x).startswith("0")) +sample_from = sample_from[~sample_from["uprn_has_leading_zero"]] + +sample_from = sample_from[~pd.isnull(sample_from["UPRN"])] + +# We now take a sample of the properties that represent 85% of the total properties +sample_from = sample_from.merge( + archetypes_95, + on=archetype_variables, + how="inner" +) + +# We take 1 random property, by archetype reference +modelling_sample = sample_from.groupby("Archetypes_95_reference").apply( + lambda x: x.sample(1, random_state=42) +).reset_index(drop=True) + + +# Checking distributions +def compare_distributions(full_df, sample_df, column): + full_dist = full_df[column].value_counts(normalize=True) + sample_dist = sample_df[column].value_counts(normalize=True) + comparison = pd.concat([full_dist, sample_dist], axis=1, keys=['Full', 'Sample']).fillna(0) + return comparison + + +for col in archetype_variables: + print(f"--- {col} ---") + print(compare_distributions(sustainability_data, sample_from, col)) + +# prepare +modelling_sample["domna_property_id"] = modelling_sample.index + 1 +# Rename +modelling_sample = modelling_sample.rename( + columns={ + "Org Ref": "landlord_property_id", "Address 1": "domna_address_1", + "Postcode": "postcode", "Type": "landlord_property_type", + "Attachment": "landlord_built_form", + "Heating": "landlord_heating_system", + "UPRN": "epc_os_uprn" + } +) + +modelling_sample["landlord_built_form"] = modelling_sample["landlord_built_form"].map( + { + "MidTerrace": "Mid-Terrace", + "EndTerrace": "End-Terrace", + "SemiDetached": "Semi-Detached", + "Detached": "Detached", + "EnclosedEndTerrace": "Enclosed End-Terrace", + "EnclosedMidTerrace": "Enclosed Mid-Terrace", + } +) + +if pd.isnull(modelling_sample["landlord_built_form"]).sum(): + raise ValueError("Some built forms are null after mapping") + + +# Placeholder copies +def make_full_address(x): + to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']] + to_join = [x for x in to_join if not pd.isnull(x) and x != ''] + return ", ".join(to_join) + + +modelling_sample["domna_full_address"] = modelling_sample.apply(lambda x: make_full_address(x), axis=1) + +# Save this CSV as input +modelling_sample.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/modelling_sample.xlsx", + sheet_name="Standardised Asset List" +) +# Save the archetype definitions +archetypes_95.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/archetypes_85.xlsx", +) +# Save the full archetypes +archetypes.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/full_archetypes.xlsx", +) + +# Maps the property types to the format recognised by the EPC api +property_type_map = {} +# Maps the build form to the format recognised by the OS api +built_form_map = {} + +# Proposed data fetching +# 1) grab propeties with UPRN and fetch the assocated EPC data & find my EPC data +# Some thoughts: +# S3 is quite cheap to query however we may incur some cost if we're making hundreds of thousands of calls +# to S3 to fetch data out of it. It's cheap to fetch data, if we aren't taking data out of S3, but we +# should consider this. This may influence whether or not we want to store each record individually +# against UPRN, or store against the 10,641 postcodes. We can fetch the data and store in a single +# large dump and then determine later if we want to split it up + +# TODO: Handle properties without uprn +# TODO: I think we can json dump all of this, but check if we can load and re-use the page source +# TODO: Create batches? + +batch_size = 500 +batch_indexes = list(range(0, len(sustainability_data), batch_size)) + +# TODO: SET +working_directory = "" +download_contents = os.listdir(working_directory) + +for i in range(0, len(sustainability_data.standardised_asset_list), batch_size): + + batch_name = f"batch_{i}_to_{i + batch_size}" + # TODO: Check this + if batch_name in download_contents: + # Means we already have the data downloaded + continue + + batch_data = {} + for _, property_data in tqdm(sustainability_data.iterrows(), total=len(sustainability_data)): + os_uprn = property_data["UPRN"] + address1 = property_data["Address 1"] + postcode = property_data["Postcode"] + full_address_components = [ + x for x in [property_data["Address 1"], property_data["Address 2"], property_data["Address 3"]] + if not pd.isnull(x) + ] + full_address = ", ".join(full_address_components) + + fetched_data = get_data_for_property( + address1=address1, + postcode=postcode, + full_address=full_address, + property_type=property_type_map[property_data["Type"]], + built_form=built_form_map[property_data["Attachment"]], + uprn=property_data["UPRN"], + epc_auth_token=EPC_AUTH_TOKEN, + find_my_epc_return_page=True + ) + + batch_data[property_data["Org Ref"]] = fetched_data + + # TODO: We likely want to do something like this: to slow down + # TODO: We also perhaps store the data in batches + if len(batch_data) % 50 == 0 and len(batch_data) > 0: + logger.info("Sleeping for 10 seconds to avoid hitting API rate limit") + time.sleep(10) + + # Store the batch data in the wd + with open(os.path.join(working_directory, batch_name), "wb") as f: + json.dump(batch_data, f) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/b_data_cleanse.py b/etl/customers/peabody/Nov 2025 Consulting Project/b_data_cleanse.py new file mode 100644 index 00000000..13faa371 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/b_data_cleanse.py @@ -0,0 +1,147 @@ +""" +We have found, within the Peabody data, a large volume of properties with missing and incorrects +UPRNS and incorrect address data. We want to flag these records and also find missings where we can + +We also have duplicate UPRNS that should be flagged +""" +import json +import time +import os +import pandas as pd +import numpy as np +from tqdm import tqdm +from dotenv import load_dotenv +from asset_list.utils import get_data_for_property +from utils.logger import setup_logger +from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet + +logger = setup_logger() + +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") + +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) +property_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Properties" +) + +missing_uprns = sustainability_data[pd.isnull(sustainability_data['UPRN'])].copy() + +# Any non-numeric UPRNS or leading with 0s are invalid +non_numeric_uprns = sustainability_data[ + ~sustainability_data['UPRN'].astype(str).str.match(r'^[1-9][0-9]*$') & ~pd.isnull(sustainability_data['UPRN']) + ].copy() +# 70 properties +leading_zero_uprns = sustainability_data[ + sustainability_data['UPRN'].astype(str).str.startswith('0') +].copy() + +# Flag duplicates +duplicate_uprns = sustainability_data[ + sustainability_data.duplicated(subset=['UPRN'], keep=False) & ~pd.isnull(sustainability_data['UPRN']) + ].copy() + +# Store this data +# missing_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting +# Project/data_validation/missing_uprns.csv", index=False) +# non_numeric_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting +# Project/data_validation/non_numeric_uprns.csv", index=False) +# leading_zero_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting +# Project/data_validation/leading_zero_uprns.csv", index=False) +# duplicate_uprns.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting +# Project/data_validation/duplicate_uprns.csv", index=False) + +# Take everything remaining +data_needing_validation = sustainability_data[ + ~sustainability_data["Org Ref"].isin( + missing_uprns["Org Ref"].values.tolist() + non_numeric_uprns["Org Ref"].values.tolist() + + leading_zero_uprns["Org Ref"].values.tolist() + duplicate_uprns["Org Ref"].values.tolist() + ) +].copy() + +# TODO: We should build a SAL for UPRNS that are missing, invalid or duplicated + +# We check UPRN validity against our OS data +uprn_filenames = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet" +) + +# We're going to: +# 1) Grab a filename +# 2) Read it in +# 3) Check which UPRNS from our data are in that file +# 4) Keep a record of which UPRNS were found where + +for uprn_file in tqdm(uprn_filenames['filenames'].values, total=len(uprn_filenames)): + spatial_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key=f"spatial/{uprn_file}" + ) + + uprns_in_file = data_needing_validation[ + data_needing_validation['UPRN'].astype('Int64').isin(spatial_data['UPRN'].astype('Int64').values) + ].copy() + + print("Found {} UPRNS in file {}".format(len(uprns_in_file), uprn_file)) + if len(uprns_in_file) > 0: + # Store the found UPRNS in the validation cache + data_to_store = uprns_in_file[["Org Ref", "UPRN"]].copy() + data_to_store["Source File"] = uprn_file + # Store + data_to_store.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + f"Project/data_validation/validation_cache/{uprn_file.split('.parquet')[0]}_found_uprns.csv", + index=False + ) + +# Get all of the files: +storage_locations = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation/validation_cache") +# List contents +folder_contents = os.listdir(storage_locations) +# Grab files and concatenate +all_found_uprns = [] +for file in folder_contents: + if file.endswith("_found_uprns.csv"): + df = pd.read_csv(os.path.join(storage_locations, file)) + all_found_uprns.append(df) + +all_found_uprns = pd.concat(all_found_uprns) + +# We now flag any UPRNS that were not found in any of the OS datasets +os_missed_uprns = data_needing_validation[ + ~data_needing_validation['Org Ref'].isin(all_found_uprns['Org Ref'].values.tolist()) +].copy() + +# store +os_missed_uprns.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation/os_missed_uprns.csv", + index=False +) + +# Now build a larger table for standardisation +to_standardised = pd.concat( + [missing_uprns, non_numeric_uprns, leading_zero_uprns, duplicate_uprns, os_missed_uprns] +) + +to_standardised.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation/to_standardise_uprns.xlsx", + index=False) + +# We prepare a finalised dataset to work with, that excludes all problematic properties and leaves us with +# properties for which we have the data we need + +finalised_data = sustainability_data[ + ~sustainability_data["Org Ref"].isin( + to_standardised["Org Ref"].values.tolist() + ) +].copy() + +# Prepare with the column formats we need, as analogous to a_data_prep where we defined an initial working sample diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py b/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py new file mode 100644 index 00000000..3f56d82d --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/c_finalised_modelling_data.py @@ -0,0 +1,95 @@ +import pandas as pd + +### Prepare +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +# Data we want to remove: +missing_uprns = sustainability_data[pd.isnull(sustainability_data['UPRN'])].copy() + +# Any non-numeric UPRNS or leading with 0s are invalid +non_numeric_uprns = sustainability_data[ + ~sustainability_data['UPRN'].astype(str).str.match(r'^[1-9][0-9]*$') & ~pd.isnull(sustainability_data['UPRN']) + ].copy() +# 70 properties +leading_zero_uprns = sustainability_data[ + sustainability_data['UPRN'].astype(str).str.startswith('0') +].copy() + +# Flag duplicates +duplicate_uprns = sustainability_data[ + sustainability_data.duplicated(subset=['UPRN'], keep=False) & ~pd.isnull(sustainability_data['UPRN']) + ].copy() + +# Read in the UPRNs that were not valid based on the OS data +os_missed_uprns = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation/os_missed_uprns.csv", +) + +modelling_data = sustainability_data[ + ~sustainability_data["Org Ref"].isin( + missing_uprns["Org Ref"].unique().tolist() + non_numeric_uprns["Org Ref"].unique().tolist() + + leading_zero_uprns["Org Ref"].unique().tolist() + duplicate_uprns["Org Ref"].unique().tolist() + + os_missed_uprns["Org Ref"].unique().tolist() + ) +].copy() + +# Need to prepare for upload +# Variables: + + +modelling_data["landlord_property_id"] = sustainability_data["Org Ref"].copy() +modelling_data["domna_property_id"] = sustainability_data["Org Ref"].copy() + +modelling_data = modelling_data.rename( + columns={ + "Address 1": "domna_address_1", + "Postcode": "postcode", + "Type": "landlord_property_type", + "Attachment": "landlord_built_form", + "Heating": "landlord_heating_system", + "UPRN": "epc_os_uprn" + } +) + + +def make_full_address(x): + to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']] + to_join = [x for x in to_join if not pd.isnull(x) and x != ''] + return ", ".join(to_join) + + +modelling_data["domna_full_address"] = modelling_data.apply(lambda x: make_full_address(x), axis=1) + +modelling_data = modelling_data[ + [ + "domna_address_1", "Address 2", "Address 3", "postcode", "landlord_property_type", + "landlord_built_form", "landlord_heating_system", "epc_os_uprn", "Total Floor Area (m2)", + "domna_property_id", "domna_full_address" + ] +] + +modelling_data["landlord_built_form"] = modelling_data["landlord_built_form"].map( + { + "MidTerrace": "Mid-Terrace", + "EndTerrace": "End-Terrace", + "SemiDetached": "Semi-Detached", + "Detached": "Detached", + "EnclosedEndTerrace": "Enclosed End-Terrace", + "EnclosedMidTerrace": "Enclosed Mid-Terrace", + } +) + +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " + "data.xlsx") +with pd.ExcelWriter(filename) as writer: + modelling_data.to_excel(writer, sheet_name="Standardised Asset List", index=False) + # Store the three sections + modelling_data[0:30000].to_excel(writer, sheet_name="Part 1", index=False) + modelling_data[30000:60000].to_excel(writer, sheet_name="Part 2", index=False) + modelling_data[60000:].to_excel(writer, sheet_name="Part 3", index=False) + modelling_data.sample(60).to_excel(writer, sheet_name="Random testing sample", index=False) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py new file mode 100644 index 00000000..68978b08 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py @@ -0,0 +1,162 @@ +""" +For the Peabody project, there were a number of subtasks that failed due to issues, with the most +prominent being errors with the property address and ID data. + +This script will fetch those failed subtasks, get the associated properties and delete the properties +from the database so that the subtasks can be restarted cleanly. + +Additionally, we wil find the problematic records and remove them + +Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan +or recommendations in case something went wrong +""" +import pandas as pd +from sqlalchemy.orm import Session +from backend.app.db.models.portfolio import PropertyModel +from backend.app.db.connection import db_session + + +def get_uprns_for_portfolio(session: Session, portfolio_id: int) -> list[int]: + return [ + uprn + for (uprn,) in + session.query(PropertyModel.uprn) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + if uprn is not None + ] + + +with db_session() as session: + completed_uprns = get_uprns_for_portfolio(session, 419) + +# We now find the portfolio of the SAL, which we did not set off +sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " + "data.xlsx", + sheet_name="Standardised Asset List" +) + +missed_properties = sal[~sal["epc_os_uprn"].isin(completed_uprns)] + +# Store +missed_properties.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" + "d_failed_properties_to_restart_20260102.xlsx", + sheet_name="Standardised Asset List", + index=False +) + +# Fixing an error - triggered jobs without removing EWI/IWI so need to delete all plans associated to these scenarios: +scenario_id = None + +from sqlalchemy import select, func +from sqlalchemy.orm import Session +from backend.app.db.models.recommendations import Plan + + +def count_plans_for_scenario(session: Session, scenario_id: int) -> int: + return session.execute( + select(func.count()) + .select_from(Plan) + .where(Plan.scenario_id == scenario_id) + ).scalar_one() + + +with db_session() as session: + n_plans = count_plans_for_scenario(session, scenario_id) + + +def get_plan_ids_for_scenario(session: Session, scenario_id: int) -> list[int]: + result = session.execute( + select(Plan.id) + .where(Plan.scenario_id == scenario_id) + ) + return [row.id for row in result] + + +with db_session() as session: + plan_ids = get_plan_ids_for_scenario(session, scenario_id) + +from sqlalchemy import text +from sqlalchemy.orm import Session + + +def chunked(iterable, size): + for i in range(0, len(iterable), size): + yield iterable[i:i + size] + + +from sqlalchemy import text +from sqlalchemy.orm import Session + + +def delete_plan_batch(session: Session, plan_ids: list[int]): + if not plan_ids: + return + + session.execute(text("SET LOCAL lock_timeout = '5s'")) + + params = {"plan_ids": plan_ids} + + # ---------------------------- + # recommendation_materials + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation_materials rm + USING plan_recommendations pr + WHERE rm.recommendation_id = pr.recommendation_id + AND pr.plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # plan_recommendations + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # recommendations (only those used by these plans) + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation r + WHERE r.id IN ( + SELECT DISTINCT recommendation_id + FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + ) + """), + params, + ) + + # ---------------------------- + # plans LAST + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan + WHERE id = ANY(:plan_ids) + """), + params, + ) + + +batch_size = 25 +total = (len(plan_ids) + batch_size - 1) // batch_size + +for i, batch in enumerate(chunked(plan_ids, batch_size), start=1): + print(f"Deleting plan batch {i}/{total} ({len(batch)} plans)") + + with db_session() as session: + delete_plan_batch(session, batch) + + print(f"Batch {i} committed") diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/e_additional_uprns.py b/etl/customers/peabody/Nov 2025 Consulting Project/e_additional_uprns.py new file mode 100644 index 00000000..7b7ab5ac --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/e_additional_uprns.py @@ -0,0 +1,145 @@ +# We look to match the missed properties to the UPRNS that were sent over by Peabody +from tqdm import tqdm +import pandas as pd +import os +from utils.s3 import read_dataframe_from_s3_parquet + +cleaned_uprns = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/PeabodyPropertymatched_Dec25_propref_UPRN.xlsx" +) + +# Grab the problematic records +problematic_records = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation/to_standardise_uprns.xlsx" +) +# Remove dupe on Org Ref +problematic_records = problematic_records.drop_duplicates("Org Ref") + +df = problematic_records.merge( + cleaned_uprns, + left_on="Org Ref", + right_on="reference" +) + +# df_had_uprn = df[~pd.isnull(df["UPRN"])] + +# We prepare the data for analysis +df["landlord_property_id"] = df["Org Ref"].copy() +df["domna_property_id"] = df["Org Ref"].copy() + +df = df.rename( + columns={ + "Address 1": "domna_address_1", + "Postcode": "postcode", + "Type": "landlord_property_type", + "Attachment": "landlord_built_form", + "Heating": "landlord_heating_system", + "out_uprn": "epc_os_uprn" + } +) + + +def make_full_address(x): + to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']] + to_join = [x for x in to_join if not pd.isnull(x) and x != ''] + return ", ".join(to_join) + + +df["domna_full_address"] = df.apply(lambda x: make_full_address(x), axis=1) + +df = df[ + [ + "domna_address_1", "Address 2", "Address 3", "postcode", "landlord_property_type", + "landlord_built_form", "landlord_heating_system", "epc_os_uprn", "Total Floor Area (m2)", + "domna_property_id", "domna_full_address" + ] +] + +df["landlord_built_form"] = df["landlord_built_form"].map( + { + "MidTerrace": "Mid-Terrace", + "EndTerrace": "End-Terrace", + "SemiDetached": "Semi-Detached", + "Detached": "Detached", + "EnclosedEndTerrace": "Enclosed End-Terrace", + "EnclosedMidTerrace": "Enclosed Mid-Terrace", + } +) + +# We have a lot of dupes - remove them +df["epc_os_uprn"].duplicated().sum() + +dupe_uprns = df[df["epc_os_uprn"].duplicated()]["epc_os_uprn"].values +dupe_df = df[df["epc_os_uprn"].isin(dupe_uprns)] +dupe_df = dupe_df.sort_values("epc_os_uprn", ascending=True) +# Remove clear duplicate UPRNs because of unreliability +df = df[~df["epc_os_uprn"].isin(dupe_uprns)] + +filename = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " + "UPRNS.xlsx" +) +with pd.ExcelWriter(filename) as writer: + df.to_excel(writer, sheet_name="Standardised Asset List", index=False) + +# Check these are valid +# We check UPRN validity against our OS data +# uprn_filenames = read_dataframe_from_s3_parquet( +# bucket_name="retrofit-data-dev", file_key="spatial/filename_meta.parquet" +# ) +# +# # We're going to: +# # 1) Grab a filename +# # 2) Read it in +# # 3) Check which UPRNS from our data are in that file +# # 4) Keep a record of which UPRNS were found where +# +# for uprn_file in tqdm(uprn_filenames['filenames'].values, total=len(uprn_filenames)): +# spatial_data = read_dataframe_from_s3_parquet( +# bucket_name="retrofit-data-dev", file_key=f"spatial/{uprn_file}" +# ) +# +# uprns_in_file = df[ +# df['out_uprn'].astype('Int64').isin(spatial_data['UPRN'].astype('Int64').values) +# ].copy() +# +# print("Found {} UPRNS in file {}".format(len(uprns_in_file), uprn_file)) +# if len(uprns_in_file) > 0: +# # Store the found UPRNS in the validation cache +# data_to_store = uprns_in_file[["Org Ref", "UPRN"]].copy() +# data_to_store["Source File"] = uprn_file +# # Store +# data_to_store.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " +# f"Project/data_validation/missing_uprn_validation_cache/{uprn_file.split('.parquet')[0]}_found_uprns.csv", +# index=False +# ) +# +# storage_locations = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " +# "Project/data_validation/missing_uprn_validation_cache") +# # List contents +# folder_contents = os.listdir(storage_locations) +# # Grab files and concatenate +# all_found_uprns = [] +# for file in folder_contents: +# if file.endswith("_found_uprns.csv"): +# df = pd.read_csv(os.path.join(storage_locations, file)) +# all_found_uprns.append(df) +# +# all_found_uprns = pd.concat(all_found_uprns) +# +# invalid = df[ +# ~df["Org Ref"].isin(all_found_uprns["Org Ref"].values) +# ] +# +# uprn_example = 10095401237 +# eg = uprn_filenames[ +# (uprn_filenames["upper"] >= uprn_example) & (uprn_filenames["lower"] <= uprn_example) +# ] +# eg2 = read_dataframe_from_s3_parquet( +# bucket_name="retrofit-data-dev", file_key=f"spatial/{eg['filenames'].values[0]}" +# ) +# +# eg2[eg2["UPRN"] == uprn_example] diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py new file mode 100644 index 00000000..4b946c60 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py @@ -0,0 +1,246 @@ +""" +This script performs a deep dive into the various scenarios and checks fundamental things +This includes: +1) Do properties that should have a plan, have a plan? E.g. if the property is EPC D, and has a plan getting up to +# EPC C, there should be a plan +2) If the plan is fabric first, make sure they are actually fabric first +""" +import pandas as pd + +scenario_names = { + 871: "EPC C, fabric first, no solid floor, ashp 3.0", + 863: "EPC B, No EWI IWI, No Solid Floor, ASHP 3.0 COP", + 862: "EPC B, No solid floor, ASHP COP 3.0", + 861: "EPC C, No EWI IWI, No Solid Floor, ASHP 3.0 COP", + 859: "EPC C, no solid floor, ashp 3.0", +} + +scenario_sap_targets = { + 871: 69, + 863: 81, + 862: 81, + 861: 69, + 859: 69, +} + +problems = [] +for scenario_id, scenario_name in scenario_names.items(): + # Read in the recommended measures + print("Reading") + df = pd.read_excel( + f"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" + f"{scenario_name}.xlsx" + ) + + # find properties that are below the scenario sap target, but have no recommended measures + df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id] + df["no_recommended_measures"] = df["sap_points"] == 0 + df["zero_cost"] = df["total_retrofit_cost"] == 0 + df["sap_points_above_zero"] = df["sap_points"] > 0 + + # Also look for zero cost and SAP points > 0 + + problematic_properties = df[ + (df["below_scenario_target"] & df["no_recommended_measures"]) + ].copy() + + if scenario_sap_targets[scenario_id] == 81: + problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"] + + zero_cost_above_zero_sap = df[ + (df["sap_points_above_zero"] & df["zero_cost"]) + ].copy() + + # show all columns + # Source - https://stackoverflow.com/a + # Posted by YOLO, modified by community. See post 'Timeline' for change history + # Retrieved 2026-01-06, License - CC BY-SA 4.0 + + # pd.set_option('display.max_rows', 500) + # pd.set_option('display.max_columns', 500) + # pd.set_option('display.width', 1000) + # problematic_properties.head(len(problematic_properties)) + + print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})") + print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})") + + problems.append(problematic_properties) + problems.append(zero_cost_above_zero_sap) + + # plan_input = [ + # { + # "uprn": 100022725126, + # "address": "FLAT 5 Daveys Court", + # "postcode": "WC2N 4BW" + # } + # ] + + # plan_input = [ + # { + # "uprn": 100120966352, + # "address": "FLAT 11 Kingsgate", + # "postcode": "OX18 2BP" + # } + # ] + + plan_input = [ + { + "uprn": 200003371857, + "postcode": "SE1 5SJ", + "address": "39 BUTTERMERE CLOSE", + } + ] + +all_problems = pd.concat(problems) +all_problems = all_problems.drop_duplicates(subset=["uprn"]) + +sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " + "data.xlsx", + sheet_name="Standardised Asset List" +) +sal2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " + "UPRNS.xlsx", + sheet_name="Standardised Asset List" +) + +sal = pd.concat([sal, sal2]) + +retry = sal[sal["epc_os_uprn"].isin(all_problems["uprn"])] + +# Store +retry.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" + "d_problematic_properties_to_review_20260106.xlsx", + sheet_name="Standardised Asset List", + index=False +) + +# Delete associated plans +# 1) Get the property IDs for these UPRNS, for this portfolio +portfolio_id = 419 +uprns = retry["epc_os_uprn"].tolist() + +# TODO: Delete all plans for these properties and re-build +from sqlalchemy.orm import Session +from backend.app.db.models.portfolio import PropertyModel +from backend.app.db.connection import db_session +from backend.app.db.models.recommendations import Plan +from sqlalchemy import select, delete +from sqlalchemy.exc import NoResultFound +from sqlalchemy.orm import sessionmaker + + +def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]: + return [ + property.id + for property in session.query(PropertyModel) + .filter( + PropertyModel.portfolio_id == portfolio_id, + PropertyModel.uprn.in_(uprns) + ) + .all() + ] + + +with db_session() as session: + property_ids_to_delete = get_property_ids_for_uprns(session, portfolio_id, uprns) + + +# Get all and delete plans for these property IDs +def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]: + return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all() + + +def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]: + return [ + plan.id + for plan in session.query(Plan) + .filter(Plan.property_id.in_(property_ids)) + .all() + ] + + +with db_session() as session: + plan_ids_to_delete = get_ids_of_plans_for_deletion(session, property_ids_to_delete) + + +def chunked(iterable, size): + for i in range(0, len(iterable), size): + yield iterable[i:i + size] + + +from sqlalchemy import text +from sqlalchemy.orm import Session + + +def delete_plan_batch(session: Session, plan_ids: list[int]): + if not plan_ids: + return + + session.execute(text("SET LOCAL lock_timeout = '5s'")) + + params = {"plan_ids": plan_ids} + + # ---------------------------- + # recommendation_materials + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation_materials rm + USING plan_recommendations pr + WHERE rm.recommendation_id = pr.recommendation_id + AND pr.plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # plan_recommendations + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # recommendations (only those used by these plans) + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation r + WHERE r.id IN ( + SELECT DISTINCT recommendation_id + FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + ) + """), + params, + ) + + # ---------------------------- + # plans LAST + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan + WHERE id = ANY(:plan_ids) + """), + params, + ) + + +batch_size = 25 +total = (len(plan_ids_to_delete) + batch_size - 1) // batch_size + +for i, batch in enumerate(chunked(plan_ids_to_delete, batch_size), start=1): + print(f"Deleting plan batch {i}/{total} ({len(batch)} plans)") + + with db_session() as session: + delete_plan_batch(session, batch) + + print(f"Batch {i} committed") diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py new file mode 100644 index 00000000..4405d113 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py @@ -0,0 +1,1702 @@ +import pandas as pd +from tqdm import tqdm +from sqlalchemy.orm import sessionmaker +from backend.app.db.connection import db_engine, db_read_session, db_session +from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials, \ + InstalledMeasure +from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel +from backend.app.utils import sap_to_epc +from typing import Dict, List, Set +from recommendations.Costs import Costs +from backend.app.db.models.portfolio import Epc + +pd.set_option('display.max_rows', 500) +pd.set_option('display.max_columns', 500) +pd.set_option('display.width', 1000) + + +def get_all_data(portfolio_id, scenario_ids): + session = sessionmaker(bind=db_engine)() + session.begin() + + # -------------------- + # Properties + # -------------------- + properties_query = session.query( + PropertyModel, + PropertyDetailsEpcModel + ).join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id + ).filter( + PropertyModel.portfolio_id == portfolio_id + ).all() + + properties_data = [ + { + **{col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns}, + **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns}, + } + for p in properties_query + ] + + # -------------------- + # Plans + # -------------------- + plans_query = session.query(Plan).filter( + Plan.scenario_id.in_(scenario_ids) + ).all() + + plans_data = [ + {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + for plan in plans_query + ] + + plan_ids = [p["id"] for p in plans_data] + + # -------------------- + # Recommendations (NO materials yet) + # -------------------- + recommendations_query = session.query( + Recommendation, + Plan.scenario_id + ).join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id + ).join( + Plan, + Plan.id == PlanRecommendations.plan_id + ).filter( + PlanRecommendations.plan_id.in_(plan_ids), + ).all() + + recommendations_data = [ + { + **{col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns}, + "scenario_id": r.scenario_id, + "materials": [] # placeholder + } + for r in recommendations_query + ] + + session.close() + + return properties_data, plans_data, recommendations_data + + +PORTFOLIO_ID = 431 # Peabody - new portfolio +SCENARIOS = [ + 891, # EPC B - No solid floor, ASHP COP 3.0 +] + +# properties_data, plans_data, recommendations_data = get_all_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS) +# # Store this data as dataframes for analysis +# properties_df = pd.DataFrame(properties_data) +# plans_df = pd.DataFrame(plans_data) +# recommendations_df = pd.DataFrame(recommendations_data) + +# Save CSVs +# properties_df.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" +# "Final portfolio datasets/v2/peabody_properties_data_20260108.csv", +# index=False +# ) +# plans_df.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" +# "Final portfolio datasets/v2/peabody_plans_data_20260108.csv", +# index=False +# ) +# recommendations_df.to_csv( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" +# "Final portfolio datasets/v2/peabody_recommendations_data_20260108.csv", +# index=False +# ) +# Read csvs +properties_df = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/Final portfolio datasets/v2/peabody_properties_data_20260108.csv" +) +plans_df = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final portfolio " + "datasets/v2/peabody_plans_data_20260108.csv" +) +recommendations_df = pd.read_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final portfolio " + "datasets/v2/peabody_recommendations_data_20260108.csv" +) + +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) +sustainability_data_with_sap = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " + "08012026.xlsx", +) + +properties_df["uprn"] = properties_df["uprn"].astype(str) +property_data_comparison = properties_df.merge( + sustainability_data, how="inner", left_on="uprn", right_on="UPRN", suffixes=("_prop", "_sust") +) + +property_data_comparison["wall_type"] = property_data_comparison["walls"].str.split(",").str[0].str.strip() + +column_pairs = { + "built_form": "Attachment", + "property_type": "Type", + "wall_type": "Wall Construction", + "heating": "Heating", +} +combination_tables = {} + +for v1, v2 in column_pairs.items(): + df = property_data_comparison.groupby([v1, v2]).size().reset_index(name='count') + combination_tables[v1] = df + +# We just need all of the measure types, per property +recommendation_measure_types = recommendations_df[ + ["property_id", "measure_type" + , "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", + "energy_cost_savings" + ] +].drop_duplicates() +recommendation_measure_types["flag"] = True + +# We pivot +recommendations_measures_pivot = recommendation_measure_types[ + ["property_id", "measure_type", "flag"] +].drop_duplicates().pivot( + index='property_id', + columns='measure_type', + values='flag' +) +recommendations_measures_pivot = recommendations_measures_pivot.reset_index() + +properties_to_recs = properties_df.rename(columns={"solar_pv": "solar_data"}).merge( + recommendations_measures_pivot, how="left", on="property_id" +) + +sustainability_data["cavity_wall_insulation"] = sustainability_data["Wall Insulation"].isin( + ["FilledCavity", "FilledCavityPlusInternal", "FilledCavityPlusExternal"] +) +sustainability_data["internal_wall_insulation"] = sustainability_data["Wall Insulation"].isin( + ["Internal", "FilledCavityPlusInternal"] +) +sustainability_data["external_wall_insulation"] = sustainability_data["Wall Insulation"].isin( + ["External", "FilledCavityPlusExternal"] +) +sustainability_data["loft_insulation"] = sustainability_data["Roof Insulation"].isin( + ["mm300", "mm250", "mm350", "mm400", "mm270"] +) +sustainability_data["double_glazing"] = sustainability_data["Glazing"].isin( + ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] +) +sustainability_data["secondary_glazing"] = sustainability_data["Glazing"].isin( + ["Secondary"] +) + +sustainability_data["suspended_floor_insulation"] = sustainability_data["Floor Insulation"].isin( + ["RetroFitted"] +) + +sustainability_data["boiler_upgrade"] = ( + sustainability_data["Heating"].isin(["Boilers"]) & sustainability_data["Boiler Efficiency"].isin(["A"]) +) +sustainability_data["air_source_heat_pump"] = (sustainability_data["Heating"].isin(["Heat pumps (wet)"])) + +sustainability_data["time_temperature_zone_control"] = ( + sustainability_data["Controls Adequacy"].isin(["Top Spec"]) +) + +sustainability_data["roomstat_programmer_trvs"] = ( + sustainability_data["Controls Adequacy"].isin(["Optimal"]) +) +sustainability_data["flat_roof_insulation"] = ( + (sustainability_data["Roof Construction"] == "Flat") & + (sustainability_data["Roof Insulation"].isin(["mm50", "mm150", "mm100"])) +) + +properties_to_recs["uprn"] = properties_to_recs["uprn"].astype(str) +comparison = sustainability_data.merge( + properties_to_recs[ + ["uprn", "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation", "loft_insulation", + "double_glazing", "secondary_glazing", "suspended_floor_insulation", "boiler_upgrade", "air_source_heat_pump", + "time_temperature_zone_control", "roomstat_programmer_trvs", "flat_roof_insulation", "room_roof_insulation" + ] + ], + left_on="UPRN", + right_on="uprn", + how="left", + suffixes=("", "_from_recs") +) + +# Flag entries where we've been told that walls are already insulated, but we have recommendations for wall insulation +# ------------ Walls ------------ +cwi_conflicting = comparison[ + (comparison["cavity_wall_insulation"]) & + (pd.isnull(comparison["cavity_wall_insulation_from_recs"]) == False) + ].copy() +cwi_conflicting["conflict_cavity_wall_insulation"] = True +iwi_conflicting = comparison[ + (comparison["internal_wall_insulation"]) & + (pd.isnull(comparison["internal_wall_insulation_from_recs"]) == False) + ].copy() +iwi_conflicting["conflict_iwi_wall_insulation"] = True + +ewi_conflicting = comparison[ + (comparison["external_wall_insulation"]) & + (pd.isnull(comparison["external_wall_insulation_from_recs"]) == False) + ].copy() +ewi_conflicting["conflict_ewi_wall_insulation"] = True + +# ------------ Roof ------------ +loft_conflicting = comparison[ + (comparison["loft_insulation"]) & + (pd.isnull(comparison["loft_insulation_from_recs"]) == False) + ].copy() +loft_conflicting["conflict_loft_insulation"] = True + +# ------------ Windows ------------ +double_glazing_conflicting = comparison[ + (comparison["double_glazing"] | comparison["secondary_glazing"]) & + (pd.isnull(comparison["double_glazing_from_recs"]) == False) & + (pd.isnull(comparison["secondary_glazing_from_recs"]) == True) + ].copy() +double_glazing_conflicting["conflict_double_glazing"] = True +secondary_glazing_conflicting = comparison[ + (comparison["secondary_glazing"]) & + (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) + ].copy() +secondary_glazing_conflicting["conflict_secondary_glazing"] = True + +# ------------ Floors ------------ +floors_conflicting = comparison[ + (comparison["suspended_floor_insulation"]) & + (pd.isnull(comparison["suspended_floor_insulation_from_recs"]) == False) + ].copy() +floors_conflicting["conflict_suspended_floor_insulation"] = True + +# ------------ Boiler Upgrade ------------ +boiler_conflicting = comparison[ + (comparison["boiler_upgrade"]) & + (pd.isnull(comparison["boiler_upgrade_from_recs"]) == False) + ].copy() +boiler_conflicting["conflict_boiler_upgrade"] = True + +# ------------ ASHP ------------ +ashp_conflicting = comparison[ + (comparison["air_source_heat_pump"]) & + (pd.isnull(comparison["air_source_heat_pump_from_recs"]) == False) + ].copy() +ashp_conflicting["conflict_air_source_heat_pump"] = True + +# ------------ heat controls ------------ +ttzc_conflicting = comparison[ + (comparison["time_temperature_zone_control"]) & + (pd.isnull(comparison["time_temperature_zone_control_from_recs"]) == False) + ].copy() +ttzc_conflicting["conflict_time_temperature_zone_control"] = True +rst_conflicting = comparison[ + (comparison["roomstat_programmer_trvs"]) & + (pd.isnull(comparison["roomstat_programmer_trvs_from_recs"]) == False) + ].copy() +rst_conflicting["conflict_roomstat_programmer_trvs"] = True + +# ------------ Flat Roof Insulation ----------- +flat_roof_conflicting = comparison[ + (comparison["flat_roof_insulation"]) & + (pd.isnull(comparison["flat_roof_insulation_from_recs"]) == False) + ].copy() +flat_roof_conflicting["conflict_flat_roof_insulation"] = True + +# All properties with conflicts +all_conflicts = pd.concat( + [ + cwi_conflicting, + iwi_conflicting, + ewi_conflicting, + loft_conflicting, + double_glazing_conflicting, + secondary_glazing_conflicting, + floors_conflicting, + boiler_conflicting, + ashp_conflicting, + ttzc_conflicting, + rst_conflicting, + flat_roof_conflicting + ] +) + +all_conflicts = all_conflicts[ + [ + "uprn", + 'conflict_cavity_wall_insulation', + 'conflict_iwi_wall_insulation', + 'conflict_ewi_wall_insulation', + 'conflict_loft_insulation', + 'conflict_double_glazing', + 'conflict_secondary_glazing', + 'conflict_suspended_floor_insulation', 'conflict_boiler_upgrade', + 'conflict_air_source_heat_pump', + 'conflict_time_temperature_zone_control', 'conflict_roomstat_programmer_trvs', 'conflict_flat_roof_insulation'] +] + +all_conflicts = all_conflicts.rename( + columns={ + "conflict_cavity_wall_insulation": "cavity_wall_insulation", + "conflict_iwi_wall_insulation": "internal_wall_insulation", + "conflict_ewi_wall_insulation": "external_wall_insulation", + "conflict_loft_insulation": "loft_insulation", + "conflict_double_glazing": "double_glazing", + "conflict_secondary_glazing": "secondary_glazing", + "conflict_suspended_floor_insulation": "suspended_floor_insulation", + "conflict_boiler_upgrade": "boiler_upgrade", + "conflict_air_source_heat_pump": "air_source_heat_pump", + "conflict_time_temperature_zone_control": "time_temperature_zone_control", + "conflict_roomstat_programmer_trvs": "roomstat_programmer_trvs", + "conflict_flat_roof_insulation": "flat_roof_insulation" + + } +) + +# Reshape by UPRN by melting +all_conflicts = all_conflicts.melt( + id_vars=["uprn"], + var_name="measure_type", + value_name="already_installed" +) + +recommendations_df["property_id"] = recommendations_df["property_id"].astype(int).astype(str) +properties_df["property_id"] = properties_df["property_id"].astype(int).astype(str) + +recs_with_uprn = recommendations_df.merge( + properties_df[["property_id", "uprn"]], + on="property_id", + how="left", + suffixes=("", "_prop") +) + +recs_with_uprn = ( + recs_with_uprn + .sort_values("sap_points", ascending=False) + .groupby(["uprn", "measure_type"], as_index=False) + .first() +) + +recs_with_uprn["uprn"] = recs_with_uprn["uprn"].astype(str) + +installed_measures_df = all_conflicts.merge( + recs_with_uprn[["uprn", "measure_type", "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", + "energy_cost_savings"]], + how="left", + on=["uprn", "measure_type"] +) + +installed_measures_df = installed_measures_df[installed_measures_df["already_installed"] == True] + +## --- Sense checking ---- + +FABRIC_MEASURES = { + "external_wall_insulation", + "internal_wall_insulation", + "cavity_wall_insulation", +} + + +def add_mechanical_ventilation_for_fabric(installed_measures_df, recs_with_uprn): + """ + If a property has fabric insulation installed, also mark + mechanical ventilation as installed using recommendation metrics. + """ + + # Properties with fabric installed + fabric_uprns = installed_measures_df[ + installed_measures_df["measure_type"].isin(FABRIC_MEASURES) + ]["uprn"].unique() + + # Get MV recommendation metrics (pick max SAP per property as you decided) + mv_recs = ( + recs_with_uprn[ + (recs_with_uprn["measure_type"] == "mechanical_ventilation") + & (recs_with_uprn["uprn"].isin(fabric_uprns)) + ] + .sort_values("sap_points", ascending=False) + .drop_duplicates(subset=["uprn"]) + ) + + mv_installed = mv_recs[[ + "uprn", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ]].copy() + + mv_installed["already_installed"] = True + + return pd.concat( + [installed_measures_df, mv_installed], + ignore_index=True + ) + + +# installed_measures_df = add_mechanical_ventilation_for_fabric( +# installed_measures_df, +# recs_with_uprn +# ) + +assert installed_measures_df[["uprn", "measure_type"]].duplicated().sum() == 0 + +for col in ["sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", "energy_cost_savings"]: + print(f"n missings for {col}: {pd.isnull(installed_measures_df[col]).sum()}", ) + +# Do some calcs on SAP impact +sap_impact = installed_measures_df.groupby(["uprn"])["sap_points"].sum().reset_index() +properties_sap = properties_df[["uprn", "current_sap_points", "current_epc_rating"]].copy() +properties_sap["uprn"] = properties_sap["uprn"].astype(str) + +old_sap_vs_new = properties_sap.merge( + sap_impact, how="inner", on="uprn" +) +old_sap_vs_new["new_sap_points"] = old_sap_vs_new["current_sap_points"] + old_sap_vs_new["sap_points"] +old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply(lambda x: sap_to_epc(x)) +# How many properties go from below C to above +old_sap_vs_new[old_sap_vs_new["current_sap_points"] < 69]["new_epc_rating"].value_counts() +changed = old_sap_vs_new[ + (old_sap_vs_new["current_sap_points"] < 69) & (old_sap_vs_new["new_sap_points"] >= 69) + ] + +# What do I need to do: +# TODO: - need to get a view of "all" measures for the property, not just recommended. We can do this but just looking +# at one scenario +# 1) I should store the current recommendations table, for the portfolio as a backup +# 2) I need a total of already installed SAP points for each property. This should probably be stored on the +# property_details_epc tabe +# 3) For anything already installed, I should mark already installed as True, and set the cost to zero +# 4) I need to update the plan cost to remove the cost of the installed measures + +# TODO: +# 1) Need to push the already installed measures to the database +from sqlalchemy.orm import sessionmaker +from datetime import datetime + +BATCH_SIZE = 5000 +SOURCE = "peabody_import_2026_01" + +Session = sessionmaker(bind=db_engine) + + +def bulk_insert_installed_measures(installed_measures_df): + session = Session() + + records = [] + now = datetime.utcnow() + + for _, row in installed_measures_df.iterrows(): + records.append({ + "uprn": int(row["uprn"]), + "measure_type": row["measure_type"], + "installed_at": now, + "sap_points": float(row["sap_points"]) if pd.notna(row["sap_points"]) else None, + "carbon_savings": float(row["co2_equivalent_savings"]) if pd.notna(row["co2_equivalent_savings"]) else None, + "kwh_savings": float(row["kwh_savings"]) if pd.notna(row["kwh_savings"]) else None, + "bill_savings": float(row["energy_cost_savings"]) if pd.notna(row["energy_cost_savings"]) else None, + "heat_demand_savings": float(row["heat_demand"]) if pd.notna(row["heat_demand"]) else None, + "source": SOURCE, + "is_active": True, + }) + + try: + for i in range(0, len(records), BATCH_SIZE): + batch = records[i:i + BATCH_SIZE] + session.bulk_insert_mappings(InstalledMeasure, batch) + session.commit() + print(f"✅ Inserted {i + len(batch)} / {len(records)}") + + except Exception: + session.rollback() + raise + finally: + session.close() + + +# bulk_insert_installed_measures(installed_measures_df) + +### Rebaselining +from typing import Dict +from sqlalchemy import func + +from typing import Dict +from sqlalchemy import func, case + +REBASING_EXCLUDED_MEASURES = { + "mechanical_ventilation", +} + + +def get_installed_measure_adjustments_by_uprn_for_portfolio( + session, + portfolio_id: int, +) -> Dict[int, dict]: + """ + Returns per-UPRN installed-measure adjustments for PROPERTY / EPC rebasing. + + IMPORTANT: + - Mechanical ventilation is EXCLUDED from rebasing calculations + (drag-along measure; should not alter baseline EPC/SAP). + - All other installed measures are fully applied. + + Output shape: + { + uprn: { + "sap_points": float, + "co2": float, + "energy_kwh": float, + "energy_bill": float, + "heat_demand": float, + } + } + """ + + # -------------------------------------------- + # Limit to UPRNs that belong to this portfolio + # -------------------------------------------- + uprn_subquery = ( + session.query(PropertyModel.uprn) + .filter(PropertyModel.portfolio_id == portfolio_id) + .filter(PropertyModel.uprn.isnot(None)) + .subquery() + ) + + # -------------------------------------------- + # CASE helper: exclude ventilation from rebasing + # -------------------------------------------- + def exclude_ventilation(column): + return case( + ( + InstalledMeasure.measure_type.notin_( + REBASING_EXCLUDED_MEASURES + ), + column, + ), + else_=0.0, + ) + + # -------------------------------------------- + # Aggregate installed-measure impacts per UPRN + # -------------------------------------------- + rows = ( + session.query( + InstalledMeasure.uprn.label("uprn"), + + func.coalesce( + func.sum(exclude_ventilation(InstalledMeasure.sap_points)), + 0.0, + ).label("sap_points"), + + func.coalesce( + func.sum(exclude_ventilation(InstalledMeasure.carbon_savings)), + 0.0, + ).label("co2"), + + func.coalesce( + func.sum(exclude_ventilation(InstalledMeasure.kwh_savings)), + 0.0, + ).label("energy_kwh"), + + func.coalesce( + func.sum(exclude_ventilation(InstalledMeasure.bill_savings)), + 0.0, + ).label("energy_bill"), + + func.coalesce( + func.sum( + exclude_ventilation( + InstalledMeasure.heat_demand_savings + ) + ), + 0.0, + ).label("heat_demand"), + ) + .filter(InstalledMeasure.is_active.is_(True)) + .filter(InstalledMeasure.uprn.in_(uprn_subquery)) + .group_by(InstalledMeasure.uprn) + .all() + ) + + # -------------------------------------------- + # Shape result for downstream consumers + # -------------------------------------------- + return { + row.uprn: { + "sap_points": float(row.sap_points), + "co2": float(row.co2), + "energy_kwh": float(row.energy_kwh), + "energy_bill": float(row.energy_bill), + "heat_demand": float(row.heat_demand), + } + for row in rows + } + + +def get_installed_measure_types_by_uprn( + session, + uprn: int, +) -> Set[str]: + rows = ( + session.query(InstalledMeasure.measure_type) + .filter(InstalledMeasure.uprn == uprn) + .filter(InstalledMeasure.is_active.is_(True)) + .all() + ) + + # Convert enums → strings + return { + r[0].value if hasattr(r[0], "value") else r[0] + for r in rows + } + + +# ------------------------------------------------------------ +# PROPERTY REBASING (READ-ONLY) +# ------------------------------------------------------------ + +def compute_property_sap_updates( + properties: List[PropertyModel], + sap_adjustments: Dict[int, float], # keyed by uprn +) -> List[dict]: + """ + Returns property SAP rebasing results. + ONLY returns rows where installed measures exist. + """ + + updates = [] + + for prop in properties: + if prop.uprn is None: + continue + + # 🚨 gatekeeper + if prop.uprn not in sap_adjustments: + continue + + if prop.original_sap_points is None: + continue + + sap_delta = sap_adjustments[prop.uprn] + new_sap = prop.original_sap_points + sap_delta + + updates.append({ + "property_id": prop.id, + "uprn": prop.uprn, + "original_sap_points": prop.original_sap_points, + "installed_sap_delta": sap_delta, + "new_sap_points": new_sap, + "is_adjusted": True, + }) + + return updates + + +# ------------------------------------------------------------ +# PLAN RECOMPUTATION HELPERS +# ------------------------------------------------------------ + +def get_effective_plan_recommendations( + session, plan_id: int, excluded_measure_types: Set[str] +) -> List[Recommendation]: + q = ( + session.query(Recommendation) + .join(PlanRecommendations) + .filter(PlanRecommendations.plan_id == plan_id) + .filter(Recommendation.default.is_(True))) + if excluded_measure_types: + q = q.filter( + ~Recommendation.measure_type.in_(excluded_measure_types) + ) + + return q.all() + + +def aggregate_plan_metrics(recommendations: list[Recommendation]): + agg = { + "sap_points": 0.0, + "co2_savings": 0.0, + "energy_bill_savings": 0.0, + "energy_consumption_savings": 0.0, + "valuation_increase": 0.0, + "cost_of_works": 0.0, + "contingency_cost": 0.0, + } + + for r in recommendations: + agg["sap_points"] += r.sap_points or 0.0 + agg["co2_savings"] += r.co2_equivalent_savings or 0.0 + agg["energy_bill_savings"] += r.energy_cost_savings or 0.0 + agg["energy_consumption_savings"] += r.kwh_savings or 0.0 + agg["valuation_increase"] += r.property_valuation_increase or 0.0 + + base_cost = r.estimated_cost or 0.0 + agg["cost_of_works"] += base_cost + agg["contingency_cost"] += calculate_contingency_for_recommendation(r) + + return agg + + +# ------------------------------------------------------------ +# PLAN REBASING (READ-ONLY) +# ------------------------------------------------------------ + +# session,plans,properties_by_id,epcs_by_property_id, property_sap_updates = session, plans, properties_by_id, epcs, +# property_updates_by_id + +from collections import defaultdict + + +def get_installed_measure_types_by_property_id_for_portfolio( + session, + portfolio_id: int, +) -> dict[int, set[str]]: + """ + Returns: + { property_id: {measure_type, ...} } + + Includes drag-along measures (e.g. ventilation). + """ + + rows = ( + session.query( + PropertyModel.id.label("property_id"), + InstalledMeasure.measure_type, + ) + .join( + InstalledMeasure, + InstalledMeasure.uprn == PropertyModel.uprn, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .filter(InstalledMeasure.is_active.is_(True)) + .all() + ) + + installed_by_property: dict[int, set[str]] = defaultdict(set) + + for property_id, measure_type in rows: + mt = measure_type.value if hasattr(measure_type, "value") else measure_type + installed_by_property[property_id].add(mt) + + # drag-along rules + if mt in {"cavity_wall_insulation", "internal_wall_insulation", "external_wall_insulation"}: + installed_by_property[property_id].add("mechanical_ventilation") + + return installed_by_property + + +def get_all_default_plan_recommendations( + session, + plan_ids: list[int], +) -> dict[int, list[Recommendation]]: + """ + Returns {plan_id: [Recommendation, ...]} for ALL plans in one query. + """ + + rows = ( + session.query( + PlanRecommendations.plan_id, + Recommendation, + ) + .join(Recommendation, Recommendation.id == PlanRecommendations.recommendation_id) + .filter(PlanRecommendations.plan_id.in_(plan_ids)) + .filter(Recommendation.default.is_(True)) + .all() + ) + + by_plan: dict[int, list[Recommendation]] = {} + + for plan_id, rec in rows: + by_plan.setdefault(plan_id, []).append(rec) + + return by_plan + + +def filter_remaining_recommendations( + recommendations: list[Recommendation], + installed_types: set[str], +) -> list[Recommendation]: + """ + Removes recommendations whose measure_type is already installed. + """ + if not installed_types: + return recommendations + + return [ + r for r in recommendations + if ( + (r.measure_type.value if hasattr(r.measure_type, "value") else r.measure_type) + not in installed_types + ) + ] + + +def compute_plan_updates( + session, + plans: List[Plan], + properties_by_id: Dict[int, PropertyModel], + epcs_by_property_id: Dict[int, PropertyDetailsEpcModel], + installed_types_by_property_id, + all_ventilation_measures +) -> List[dict]: + """ + Computes plan metrics after marking some recommendations as already installed. + + Rules: + - Baseline post metrics remain unchanged + - Savings + costs are recomputed excluding installed measures + - ONLY mechanical ventilation alters post metrics + """ + + all_plan_recs = get_all_default_plan_recommendations( + session, + [p.id for p in plans], + ) + + updates = [] + property_to_installed_types = {} + for plan in tqdm(plans, total=len(plans)): + prop = properties_by_id.get(plan.property_id) + epc = epcs_by_property_id.get(plan.property_id) + + if not prop or not epc: + continue + + installed_types = installed_types_by_property_id.get(prop.id, set()) + + if not installed_types: + continue + + plan_recs = all_plan_recs.get(plan.id, []) + + remaining_recs = filter_remaining_recommendations( + plan_recs, + installed_types, + ) + + remaining = aggregate_plan_metrics(remaining_recs) + + # Detect ventilation removal + ventilation_removed = "mechanical_ventilation" in installed_types + + # ------------------------------- + # Start from the previous plan + # ------------------------------- + post_sap = plan.post_sap_points + post_co2 = plan.post_co2_emissions + post_bill = plan.post_energy_bill + post_kwh = plan.post_energy_consumption + + # ------------------------------- + # Undo ventilation ONLY + # ------------------------------- + ventilation_impact = all_ventilation_measures.get(prop.uprn, None) + + if ventilation_removed and ventilation_impact is not None: + # ventilation impact = baseline - remaining + + post_sap -= ventilation_impact["sap_points"] + + post_co2 += ventilation_impact["co2"] # We save more with ventilation + + post_bill += ventilation_impact["energy_bill"] + post_kwh += ventilation_impact["energy_kwh"] + + # # Skip if nothing changes at all + # if ( + # remaining["cost_of_works"] == baseline["cost_of_works"] + # and not ventilation_removed + # ): + # continue + + updates.append({ + "plan_id": plan.id, + "property_id": plan.property_id, + + # SAP / EPC + "post_sap_points": post_sap, + "post_epc_rating": sap_to_epc(post_sap), + + # Carbon + "co2_savings": remaining["co2_savings"], + "post_co2_emissions": post_co2, + + # Energy bills + "energy_bill_savings": remaining["energy_bill_savings"], + "post_energy_bill": post_bill, + + # Energy consumption + "energy_consumption_savings": remaining["energy_consumption_savings"], + "post_energy_consumption": post_kwh, + + # Valuation (safe) + "valuation_increase": remaining["valuation_increase"], + "valuation_post_retrofit": ( + prop.current_valuation + + remaining["valuation_increase"] + if prop.current_valuation is not None + else None + ), + + # Costs + "cost_of_works": remaining["cost_of_works"], + "contingency_cost": remaining["contingency_cost"], + }) + + property_to_installed_types[prop.id] = installed_types + + return updates + + +def build_installed_recommendation_pairs( + installed_types_by_property_id: dict[int, set[str]], +) -> list[tuple[int, str]]: + """ + Returns: + [(property_id, measure_type), ...] + """ + pairs = [] + + for property_id, measure_types in installed_types_by_property_id.items(): + for mt in measure_types: + pairs.append((property_id, mt)) + + return pairs + + +def calculate_contingency_for_recommendation( + recommendation, +) -> float: + """ + Recompute contingency for a recommendation using the same + logic as the costing engine. + + Assumptions: + - recommendation.estimated_cost is the 'total' cost + - contingency is a percentage of total + """ + + if recommendation.estimated_cost is None: + return 0.0 + + # Normalise measure_type (Enum → str) + measure_type = ( + recommendation.measure_type.value + if hasattr(recommendation.measure_type, "value") + else recommendation.measure_type + ) + + # Measure-specific contingency if defined, else global fallback + contingency_rate = Costs.CONTINGENCIES.get( + measure_type, + Costs.CONTINGENCY, # default (e.g. 10%) + ) + + return recommendation.estimated_cost * contingency_rate + + +def persist_property_sap_updates( + property_updates_by_id: dict[int, dict], +): + """ + Writes adjusted SAP values back to property table. + Safe to re-run. + """ + + with db_session() as session: + properties = ( + session.query(PropertyModel) + .filter(PropertyModel.id.in_(property_updates_by_id.keys())) + .all() + ) + + for prop in properties: + update = property_updates_by_id[prop.id] + + prop.installed_measures_sap_point_adjustment = update["installed_sap_delta"] + prop.is_sap_points_adjusted_for_installed_measures = update["is_adjusted"] + prop.current_sap_points = update["new_sap_points"] + prop.current_epc_rating = sap_to_epc(update["new_sap_points"]) + + print(f"✅ Updated {len(properties)} properties") + + +def compute_epc_rebasing_updates( + epcs: Dict[int, PropertyDetailsEpcModel], + properties_by_id: Dict[int, PropertyModel], + installed_adjustments_by_uprn: Dict[int, dict], +) -> Dict[int, dict]: + """ + Computes EPC rebasing updates without mutating DB objects. + Keyed by property_id. + """ + + updates: Dict[int, dict] = {} + + for property_id, epc in epcs.items(): + prop = properties_by_id.get(property_id) + if not prop or prop.uprn is None: + continue + + adj = installed_adjustments_by_uprn.get(prop.uprn) + if not adj: + continue + + # if ( + # adj["sap_points"] == 0 + # and adj["co2"] == 0 + # and adj["energy_kwh"] == 0 + # and adj["energy_bill"] == 0 + # and adj["heat_demand"] == 0 + # ): + # continue + + updates[property_id] = { + "property_id": property_id, + + # Originals (only set once) + "original_co2_emissions": ( + epc.original_co2_emissions + if epc.original_co2_emissions is not None + else epc.co2_emissions + ), + "original_primary_energy_consumption": ( + epc.original_primary_energy_consumption + if epc.original_primary_energy_consumption is not None + else epc.primary_energy_consumption + ), + "original_current_energy_demand": ( + epc.original_current_energy_demand + if epc.original_current_energy_demand is not None + else epc.current_energy_demand + ), + "original_current_energy_demand_heating_hotwater": ( + epc.original_current_energy_demand_heating_hotwater + if epc.original_current_energy_demand_heating_hotwater is not None + else epc.current_energy_demand_heating_hotwater + ), + + # Adjustments (always re-applied from originals) + "installed_measures_co2_adjustment": adj["co2"], + "installed_measures_energy_demand_adjustment": adj["energy_kwh"], + "installed_measures_total_energy_bill_adjustment": adj["energy_bill"], + "installed_measures_heat_demand_adjustment": adj["heat_demand"], + } + + return updates + + +def persist_plan_updates(plan_updates: list[dict]): + """ + Writes recalculated plan metrics. + Safe to re-run. + """ + + with db_session() as session: + plans = ( + session.query(Plan) + .filter(Plan.id.in_([u["plan_id"] for u in plan_updates])) + .all() + ) + + plans_by_id = {p.id: p for p in plans} + + for update in plan_updates: + plan = plans_by_id.get(update["plan_id"]) + if not plan: + continue + + # SAP / EPC + plan.post_sap_points = update["post_sap_points"] + plan.post_epc_rating = Epc(update["post_epc_rating"]) + + # Carbon + plan.co2_savings = update["co2_savings"] + plan.post_co2_emissions = update["post_co2_emissions"] + + # Energy + plan.energy_bill_savings = update["energy_bill_savings"] + plan.post_energy_bill = update["post_energy_bill"] + + plan.energy_consumption_savings = update["energy_consumption_savings"] + plan.post_energy_consumption = update["post_energy_consumption"] + + # Valuation + plan.valuation_increase = update["valuation_increase"] + plan.valuation_post_retrofit = update["valuation_post_retrofit"] + + # Costs + plan.cost_of_works = update["cost_of_works"] + plan.contingency_cost = update["contingency_cost"] + + print(f"✅ Updated {len(plans)} plans") + + +def persist_epc_rebasing_updates( + epc_updates_by_property_id: Dict[int, dict], +): + """ + Overwrites EPC metrics using installed-measure rebasing. + Safe to re-run. + """ + + with db_session() as session: + epcs = ( + session.query(PropertyDetailsEpcModel) + .filter( + PropertyDetailsEpcModel.property_id.in_( + epc_updates_by_property_id.keys() + ) + ) + .all() + ) + + for epc in epcs: + u = epc_updates_by_property_id[epc.property_id] + + # Store originals once + epc.original_co2_emissions = u["original_co2_emissions"] + epc.original_primary_energy_consumption = ( + u["original_primary_energy_consumption"] + ) + epc.original_current_energy_demand = ( + u["original_current_energy_demand"] + ) + epc.original_current_energy_demand_heating_hotwater = ( + u["original_current_energy_demand_heating_hotwater"] + ) + + # Apply rebased values + epc.co2_emissions = ( + u["original_co2_emissions"] + - u["installed_measures_co2_adjustment"] + ) + + epc.primary_energy_consumption = ( + u["original_primary_energy_consumption"] + - u["installed_measures_heat_demand_adjustment"] + ) + + epc.current_energy_demand = ( + u["original_current_energy_demand"] + - u["installed_measures_energy_demand_adjustment"] + ) + + # Flags + audit fields + epc.installed_measures_co2_adjustment = ( + u["installed_measures_co2_adjustment"] + ) + epc.installed_measures_energy_demand_adjustment = ( + u["installed_measures_energy_demand_adjustment"] + ) + epc.installed_measures_total_energy_bill_adjustment = ( + u["installed_measures_total_energy_bill_adjustment"] + ) + epc.installed_measures_heat_demand_adjustment = ( + u["installed_measures_heat_demand_adjustment"] + ) + epc.is_epc_adjusted_for_installed_measures = True + + print(f"✅ Updated {len(epcs)} EPC records") + + +# For setting the original SAP, carbon, etc to the current values +def initialise_original_property_and_epc_values(portfolio_id: int): + """ + Initialise original_* columns for SAP + EPC. + Safe to re-run. Only fills NULL originals. + """ + + with db_session() as session: + # ------------------------- + # PROPERTY (SAP) + # ------------------------- + properties = ( + session.query(PropertyModel) + .filter(PropertyModel.portfolio_id == portfolio_id) + .filter(PropertyModel.original_sap_points.is_(None)) + .all() + ) + + for prop in properties: + prop.original_sap_points = prop.current_sap_points + + print(f"✅ Initialised original_sap_points for {len(properties)} properties") + + # ------------------------- + # EPC (energy / carbon) + # ------------------------- + epcs = ( + session.query(PropertyDetailsEpcModel) + .filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id) + .all() + ) + + epc_updates = 0 + + for epc in epcs: + updated = False + + if epc.original_co2_emissions is None: + epc.original_co2_emissions = epc.co2_emissions + updated = True + + if epc.original_primary_energy_consumption is None: + epc.original_primary_energy_consumption = ( + epc.primary_energy_consumption + ) + updated = True + + if epc.original_current_energy_demand is None: + epc.original_current_energy_demand = epc.current_energy_demand + updated = True + + if epc.original_current_energy_demand_heating_hotwater is None: + epc.original_current_energy_demand_heating_hotwater = ( + epc.current_energy_demand_heating_hotwater + ) + updated = True + + if updated: + epc_updates += 1 + + print(f"✅ Initialised EPC originals for {epc_updates} EPC records") + + session.commit() + + +from typing import Set, Dict +from sqlalchemy import distinct + +from typing import Dict +from sqlalchemy import func + + +def get_installed_ventilation_adjustments_by_uprn_for_portfolio( + session, + portfolio_id: int, +) -> Dict[int, dict]: + """ + Returns per-UPRN aggregated impact metrics for + already-installed MECHANICAL VENTILATION. + + { + uprn: { + sap_points: float, + co2: float, + energy_kwh: float, + energy_bill: float, + heat_demand: float, + } + } + """ + + # Only consider UPRNs that belong to this portfolio + uprn_subquery = ( + session.query(PropertyModel.uprn) + .filter(PropertyModel.portfolio_id == portfolio_id) + .filter(PropertyModel.uprn.isnot(None)) + .subquery() + ) + + rows = ( + session.query( + InstalledMeasure.uprn.label("uprn"), + + func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0) + .label("sap_points"), + + func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0) + .label("co2"), + + func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0) + .label("energy_kwh"), + + func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0) + .label("energy_bill"), + + func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0) + .label("heat_demand"), + ) + .filter(InstalledMeasure.is_active.is_(True)) + .filter(InstalledMeasure.measure_type == "mechanical_ventilation") + .filter(InstalledMeasure.uprn.in_(uprn_subquery)) + .group_by(InstalledMeasure.uprn) + .all() + ) + + return { + row.uprn: { + "sap_points": float(row.sap_points), + "co2": float(row.co2), + "energy_kwh": float(row.energy_kwh), + "energy_bill": float(row.energy_bill), + "heat_demand": float(row.heat_demand), + } + for row in rows + } + + +from sqlalchemy import update, tuple_ + + +def mark_recommendations_as_installed( + session, + property_measure_pairs: list[tuple[int, str]], + dry_run: bool = True, +): + if not property_measure_pairs: + print("No recommendations to update") + return + + print(f"{len(property_measure_pairs)} recommendation matches found") + + if dry_run: + print("DRY RUN — no database changes") + return + + stmt = ( + update(Recommendation) + .where( + tuple_(Recommendation.property_id, Recommendation.measure_type) + .in_(property_measure_pairs) + ) + .values(already_installed=True) + ) + + result = session.execute(stmt) + session.commit() + + print(f"✅ Updated {result.rowcount} recommendations") + + +# ------------------------------------------------------------ +# EXECUTION (DRY RUN) +# ------------------------------------------------------------ + +PORTFOLIO_ID = 431 +# TODO - run the original sap points update on the peabody portfolio + +# Initialising +# initialise_original_property_and_epc_values(PORTFOLIO_ID) + + +with db_read_session() as session: + properties = ( + session.query(PropertyModel) + .filter(PropertyModel.portfolio_id == PORTFOLIO_ID) + .all() + ) + + all_ventilation_measures = get_installed_ventilation_adjustments_by_uprn_for_portfolio(session, PORTFOLIO_ID) + installed_types_by_property_id = get_installed_measure_types_by_property_id_for_portfolio(session, PORTFOLIO_ID) + + plans = ( + session.query(Plan) + .filter(Plan.portfolio_id == PORTFOLIO_ID) + .all() + ) + + epcs = { + e.property_id: e + for e in ( + session.query(PropertyDetailsEpcModel) + .join(PropertyModel) + .filter(PropertyModel.portfolio_id == PORTFOLIO_ID) + .all() + ) + } + + installed_adjustments = ( + get_installed_measure_adjustments_by_uprn_for_portfolio( + session, + PORTFOLIO_ID, + ) + ) + + property_updates = compute_property_sap_updates( + properties, + {uprn: v["sap_points"] for uprn, v in installed_adjustments.items()} + ) + + properties_by_id = {p.id: p for p in properties} + property_updates_by_id = { + u["property_id"]: u + for u in property_updates + } + + epc_updates = compute_epc_rebasing_updates( + epcs, + properties_by_id, + installed_adjustments, + ) + + plan_updates = compute_plan_updates( + session, + plans, + properties_by_id, + epcs, + installed_types_by_property_id, + all_ventilation_measures, + ) + + # Used to mark recommendations + pairs = build_installed_recommendation_pairs( + installed_types_by_property_id + ) + +from copy import deepcopy + +plan_updates_comparison = deepcopy(plan_updates) +plans_by_planid = {p.id: p for p in plans} +for u in plan_updates_comparison: + before = plans_by_planid.get(u["plan_id"]) + if not before: + continue + + u.update({ + # SAP + "before_sap_points": before.post_sap_points, + "after_sap_points": u["post_sap_points"], + + # Carbon + "before_post_co2_emissions": before.post_co2_emissions, + "after_post_co2_emissions": u["post_co2_emissions"], + + # Costs + "before_cost_of_works": before.cost_of_works, + "after_cost_of_works": u["cost_of_works"], + + "before_contingency_cost": before.contingency_cost, + "after_contingency_cost": u["contingency_cost"], + }) + +plan_updates_df = pd.DataFrame(plan_updates_comparison) + +plan_updates_df["delta_sap_points"] = ( + plan_updates_df["after_sap_points"] + - plan_updates_df["before_sap_points"] +) +plan_updates_df["delta_carbon"] = ( + plan_updates_df["after_post_co2_emissions"] + - plan_updates_df["before_post_co2_emissions"] +) +plan_updates_df["delta_cost_of_works"] = ( + plan_updates_df["after_cost_of_works"] + - plan_updates_df["before_cost_of_works"] +) +plan_updates_df["delta_contingency_cost"] = ( + plan_updates_df["after_contingency_cost"] + - plan_updates_df["before_contingency_cost"] +) + +# High-level sanity checks +summary = plan_updates_df[[ + "delta_sap_points", + "delta_carbon", + "delta_cost_of_works", + "delta_contingency_cost", +]].sum() + +print(summary) + +# Grab some random samples +example = plan_updates_df[plan_updates_df["delta_cost_of_works"] < -1000].sample(1) +# example = plan_updates_df[plan_updates_df["delta_sap_points"] == 0].sample(1) +example = plan_updates_df[plan_updates_df["property_id"] == 434936].squeeze() + +print(example["property_id"]) +# Go the the db and get the UPRN +uprn_example = 202149883 +installed_adjustments[uprn_example] + +[x for x in plan_updates if x["property_id"] == example["property_id"].values[0]] + +installed_measures_example = {} + +example.squeeze() +# When ready to run! +# persist_property_sap_updates(property_updates_by_id) +# persist_plan_updates(plan_updates) +# persist_epc_rebasing_updates(epc_updates) +# BATCH_SIZE = 1000 +# +# with db_session() as session: +# for i in range(0, len(pairs), BATCH_SIZE): +# batch = pairs[i:i + BATCH_SIZE] +# +# mark_recommendations_as_installed( +# session, +# batch, +# dry_run=False, +# ) +# +# session.commit() + +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/435084/ +# Current EPC rating should go to 68.6 - no it shouldn't! less + +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/434930/ +# Should now be a C72, +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/434930 +# Carbon should be 2.02, energy_kwh should be, 12311.5 + +# We need a follow-up query which switches off ventilation if ewi, iwi or cwi are already installed +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/435154/plans/1024673 +# Should go to C73 +# This is a good one to test also, marking the recommendation as non-default + +# Good example to check: +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/434936/plans/1024455 +# Should go down by these: +# {'sap_points': 11.299999, 'co2': 1.85, 'energy_kwh': 7882.1997, 'energy_bill': 549.89935, 'heat_demand': 77.7} +# Before SAP: 55 +# Carbon 7.56 +# kwh: 28207 + +# Good example to check: +# https://assessment-model-git-main-hestiahomes.vercel.app/portfolio/430/building-passport/434444/plans/1024063 +# SHould change by these +# {'sap_points': 10.3, 'co2': 2.54, 'energy_kwh': 3713.5, 'energy_bill': 1028.2682, 'heat_demand': 151.61} +# Current: SAP 54 +# Carbon: 4.45 +# kwh: 10307 + +# There's one final thing to do - we had an error in post carbon so we need to increase it by the appliances +# amount for all units +from backend.ml_models.AnnualBillSavings import AnnualBillSavings + + +# Need to add this on to the plan for each property +def calculate_appliance_carbon_tonnes(total_floor_area: float) -> float: + """ + Returns appliance carbon emissions in tonnes CO2. + """ + appliance_energy_kwh = AnnualBillSavings.estimate_appliances_energy_use( + total_floor_area=total_floor_area + ) + + # kgCO2 → tonnes CO2 + appliance_carbon_tonnes = (appliance_energy_kwh * 0.232) / 1000 + return appliance_carbon_tonnes + + +from sqlalchemy.orm import joinedload +from tqdm import tqdm + +from tqdm import tqdm + + +def apply_appliance_carbon_to_plans( + session, + portfolio_id: int, + dry_run: bool = True, +): + """ + Adds appliance-related carbon emissions to plan.post_co2_emissions + using EPC total_floor_area. + """ + + # -------------------------------------------- + # Load EPCs (floor area source of truth) + # -------------------------------------------- + epcs = ( + session.query(PropertyDetailsEpcModel) + .filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id) + .filter(PropertyDetailsEpcModel.total_floor_area.isnot(None)) + .all() + ) + + epc_by_property_id = { + e.property_id: e for e in epcs + } + + # -------------------------------------------- + # Load plans with post carbon + # -------------------------------------------- + plans = ( + session.query(Plan) + .filter(Plan.portfolio_id == portfolio_id) + .filter(Plan.post_co2_emissions.isnot(None)) + .all() + ) + + updates = [] + total_delta = 0.0 + + for plan in tqdm(plans, total=len(plans)): + epc = epc_by_property_id.get(plan.property_id) + if not epc: + continue + + floor_area = epc.total_floor_area + if not floor_area or floor_area <= 0: + continue + + delta = float(calculate_appliance_carbon_tonnes(floor_area)) + + if delta == 0: + continue + + updates.append((plan, delta)) + total_delta += delta + + # -------------------------------------------- + # Reporting + # -------------------------------------------- + print(f"Plans affected: {len(updates)}") + print(f"Total appliance carbon added (tCO2): {total_delta:.4f}") + + if dry_run: + print("🟡 DRY RUN — no updates applied") + return + + # -------------------------------------------- + # Apply updates + # -------------------------------------------- + for plan, delta in updates: + plan.post_co2_emissions += delta + + session.commit() + print("✅ Appliance carbon successfully applied") + + +# with db_session() as session: +# apply_appliance_carbon_to_plans( +# session, +# portfolio_id=PORTFOLIO_ID, +# dry_run=False, +# ) + +# Get all uprns for entries in already installed, from the database +with db_read_session() as session: + db_uprns = { + str(r[0]) + for r in ( + session.query(InstalledMeasure.uprn) + .all() + ) + } + +# What is the overlap of these properties and the properties in portfolo 430 +sal_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260107 " + "corrected batch 6 sal.xlsx", + sheet_name="batch 1", +) + +len(sal_data[sal_data["epc_os_uprn"].astype(str).isin(db_uprns)]["epc_os_uprn"]) + +# len([uprn for uprn, v in installed_adjustments.items() if str(uprn) in sal_data["epc_os_uprn"].astype(str).tolist()]) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py new file mode 100644 index 00000000..67ff2c85 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -0,0 +1,211 @@ +import pandas as pd +from sqlalchemy.orm import Session +from sqlalchemy import text, select +from backend.app.db.connection import db_read_session +from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel +from backend.app.db.models.recommendations import Plan + +PORTFOLIO_ID = 435 + +with db_read_session() as session: + # Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419 + estimated_epcs = session.query(PropertyDetailsEpcModel).filter( + # PropertyDetailsEpcModel.estimated == True, + PropertyDetailsEpcModel.property_id.in_( + session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == PORTFOLIO_ID) + ) + ).all() + + # Get the ids + estimated_epc_ids = [epc.property_id for epc in estimated_epcs] + +# I want to get the UPRNS for these properties, from the property model +with db_read_session() as session: + estimated_uprns = session.query(PropertyModel.uprn).filter( + PropertyModel.id.in_( + session.query(PropertyDetailsEpcModel.property_id).filter( + PropertyDetailsEpcModel.id.in_(estimated_epc_ids) + ) + ) + ).all() + + estimated_uprns_list = [uprn for (uprn,) in estimated_uprns] + +# Go the the SAL +sal_1 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " + "data.xlsx", + sheet_name="Standardised Asset List" +) +sal_2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " + "UPRNS.xlsx", + sheet_name="Standardised Asset List" +) + +sal = pd.concat([sal_1, sal_2]) +sal = sal.drop_duplicates(subset=['epc_os_uprn']) + +estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy() + +SCENARIOS = [ + # 871, # EPC C - fabric first, no solid floor, ashp 3.0 + # 863, # EPC B, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + # 862, # EPC B - No solid floor, ASHP COP 3.0 + # 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP + # 859, # EPC C - no solid floor, ashp 3.0 + # 885, # EPC B - fabric first, no solid floor, ashp 3.0 + 908, 909, 910 +] + +# Get all plans, associated to these properties - the property IDs are in estimated_epc_ids +with db_read_session() as session: + result = session.execute( + select(Plan.id, Plan.property_id) + .where(Plan.property_id.in_(estimated_epc_ids)) + ) + plans = [ + { + "plan_id": row.id, + "property_id": row.property_id, + } for row in result + ] + +df = pd.DataFrame(plans) +df = df.sort_values("property_id", ascending=True) + +agg = df.groupby("property_id").size().reset_index(name="n_plans") +agg = agg.sort_values("n_plans", ascending=True) + +agg[agg["n_plans"] == 3] +agg[agg["n_plans"] == 2].shape + +agg[agg["n_plans"] != 3] +assert all(agg["n_plans"] == 3) + + +def delete_plan_batch(session: Session, plan_ids: list[int]): + if not plan_ids: + return + + session.execute(text("SET LOCAL lock_timeout = '5s'")) + + params = {"plan_ids": plan_ids} + + # ---------------------------- + # recommendation_materials + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation_materials rm + USING plan_recommendations pr + WHERE rm.recommendation_id = pr.recommendation_id + AND pr.plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # plan_recommendations + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + """), + params, + ) + + # ---------------------------- + # recommendations (only those used by these plans) + # ---------------------------- + session.execute( + text(""" + DELETE FROM recommendation r + WHERE r.id IN ( + SELECT DISTINCT recommendation_id + FROM plan_recommendations + WHERE plan_id = ANY(:plan_ids) + ) + """), + params, + ) + + # ---------------------------- + # plans LAST + # ---------------------------- + session.execute( + text(""" + DELETE FROM plan + WHERE id = ANY(:plan_ids) + """), + params, + ) + + +# Store the SAL +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 " + "sal.xlsx") + +with pd.ExcelWriter(filename) as writer: + sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) + # Top 1000 for testing + sal.iloc[0:1000, :].to_excel(writer, sheet_name="batch 1", index=False) + # Batch 2 is the next 20,000 + sal.iloc[1000:21000, :].to_excel(writer, sheet_name="batch 2", index=False) + # Batch 3 is the next 20,000 + sal.iloc[21000:41000, :].to_excel(writer, sheet_name="batch 3", index=False) + + sal.iloc[41000:61000, :].to_excel(writer, sheet_name="batch 4", index=False) + sal.iloc[61000:81000, :].to_excel(writer, sheet_name="batch 5", index=False) + sal.iloc[81000:, :].to_excel(writer, sheet_name="batch 6", index=False) + +# TODO - mistake was made when creating the final SAL +b1 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 1" +) +b2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 2" +) +b3 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 3" +) +b4 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 4" +) +b5 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " + "sal.xlsx", + sheet_name="batch 5" +) +# Batch 6 should be the remaining +total = pd.concat([b1, b2, b3, b4, b5]) +remaining = sal[~sal["epc_os_uprn"].isin(total["epc_os_uprn"].values)] +# Create new output +filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/" + "20260107 corrected batch 6 sal.xlsx") + +with pd.ExcelWriter(filename) as writer: + sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) + # Top 1000 for testing + b1.to_excel(writer, sheet_name="batch 1", index=False) + # Batch 2 is the next 20,000 + b2.to_excel(writer, sheet_name="batch 2", index=False) + # Batch 3 is the next 20,000 + b3.to_excel(writer, sheet_name="batch 3", index=False) + + b4.to_excel(writer, sheet_name="batch 4", index=False) + b5.to_excel(writer, sheet_name="batch 5", index=False) + remaining.to_excel(writer, sheet_name="batch 6", index=False) + +all_together = pd.concat( + [b1, b2, b3, b4, b5, remaining] +) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py b/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py new file mode 100644 index 00000000..41613bc3 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/i_testing_parity_data.py @@ -0,0 +1,91 @@ +import pandas as pd + +df = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " + "08012026.xlsx" +) +df["wall_combined"] = df["Wall Construction"] + "+" + df["Wall Insulation"].fillna("Unknown Insulation") + +df['SAP Score'].mean() + +df[~pd.isnull(df["Lodged EPC Score"])]["Lodged EPC Score"].mean() +df[~pd.isnull(df["Lodged EPC Score"])]["SAP Score"].mean() + +df['Difference'] = abs(df['SAP Score'] - df['Lodged EPC Score']) +df[~pd.isnull(df["Lodged EPC Score"])]["Difference"].mean() + +df["Lodged EPC Band"].value_counts(normalize=True) +df["SAP Band"].value_counts(normalize=True) + +z = df[df["SAP Band"] != df["Lodged EPC Band"]] +agg = z.groupby(["Lodged EPC Band", "SAP Band"]).size().reset_index(name="count") + +recommendations_epc_c = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, ashp 3.0 - corrected.xlsx" +) +recommendations_epc_c["uprn"] = recommendations_epc_c["uprn"].astype(int).astype(str) + +combined = recommendations_epc_c.merge( + df, + left_on="uprn", + right_on="UPRN", + suffixes=("_rec", "_sal") +) + +combined = combined[["uprn", "SAP Score", "current_sap_points", "walls", "wall_combined"]] + +combined[combined["SAP Score"] < 69]["current_epc_rating"].value_counts() +combined[combined["SAP Score"] < 69]["SAP Band"].value_counts() +combined[combined["SAP Score"] < 69].shape +combined[combined["current_sap_points"] < 69] + +combined["SAP Band"].value_counts() + +# Our Cs +combined_cs = combined[combined["SAP Score"] < 69] +combined_cs["SAP Band"].value_counts() +# Their C and below + + +compare = recommendations_epc_c[recommendations_epc_c["current_sap_points"] < 69] + +packages = recommendations_epc_c[recommendations_epc_c["total_retrofit_cost"] > 0] +packages["current_epc_rating"].value_counts() + +# TODO: 612 units +23219 - 612 +errors = recommendations_epc_c[ + (recommendations_epc_c["current_sap_points"] >= 69) & + (recommendations_epc_c["total_retrofit_cost"] > 0) + ] +errors["total_retrofit_cost"].sum() + +below_epc_c = recommendations_epc_c[recommendations_epc_c["current_sap_points"] < 69] + +below_epc_c_compare = below_epc_c.merge( + df, + left_on="uprn", + right_on="UPRN", + suffixes=("_rec", "_sal") +) + +eg1 = below_epc_c_compare[below_epc_c_compare["SAP Band"] == "C"].copy() +eg1["wall_combined"].value_counts() + +eg1_counts = eg1.groupby(["walls", "wall_combined"]).size().reset_index(name="count") +eg1_counts = eg1_counts.sort_values("count", ascending=False) + +externally_insulated = eg1[ + (eg1["wall_combined"] == "Solid Brick+External") & + pd.isnull(eg1["internal_wall_insulation"]) + ] + +externally_insulated[externally_insulated.index == 823]["uprn"] + +recommendations_epc_c[ + (recommendations_epc_c["current_sap_points"] < 69) & + (recommendations_epc_c["current_sap_points"] > 68) + ].shape + +recommendations_epc_c[recommendations_epc_c["wall_combined"] == ""] diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/j_installed_measures.py b/etl/customers/peabody/Nov 2025 Consulting Project/j_installed_measures.py new file mode 100644 index 00000000..370473a1 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/j_installed_measures.py @@ -0,0 +1,7 @@ +import pandas as pd + +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py new file mode 100644 index 00000000..cd7fba63 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py @@ -0,0 +1,491 @@ +import pandas as pd + +epc_c_recommendations = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, ashp 3.0 - corrected.xlsx" +) +epc_b_recommendations = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no " + "solid floor, ashp 3.0 - corrected.xlsx" +) + +epc_c_movers = epc_b_recommendations[ + epc_b_recommendations["current_epc_rating"] == "Epc.C" + ] +epc_c_movers["property_type"].value_counts() + +house_epc_c_movers = epc_c_movers[ + epc_c_movers["property_type"] == "House" + ] +house_epc_c_movers_with_solar = house_epc_c_movers[ + ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"]) + ] + +house_epc_c_movers_with_a_heatpump = house_epc_c_movers[ + ~pd.isnull(house_epc_c_movers["air_source_heat_pump"]) +] + +flat_epc_c_movers = epc_c_movers[ + epc_c_movers["property_type"] == "Flat" + ] + +epc_c_recommendations["sap_points"].mean() +epc_c_recommendations["sap_points"].mean() + +measure_cols = [ + "air_source_heat_pump", + "boiler_upgrade", + "cavity_wall_insulation", + "double_glazing", + "external_wall_insulation", + "flat_roof_insulation", + "high_heat_retention_storage_heaters", + "internal_wall_insulation", + "loft_insulation", + "low_energy_lighting", + "mechanical_ventilation", + "room_roof_insulation", + "roomstat_programmer_trvs", + "sealing_open_fireplace", + "secondary_glazing", + "secondary_heating", + "solar_pv", + "solar_pv_with_battery", + "suspended_floor_insulation", + "time_temperature_zone_control", +] + +epc_c_melted = ( + epc_c_recommendations + .melt( + id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols], + value_vars=measure_cols, + var_name="measure_type", + value_name="value", + ) + .dropna(subset=["value"]) +) +epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0] +epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index() + +epc_b_melted = ( + epc_b_recommendations + .melt( + id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols], + value_vars=measure_cols, + var_name="measure_type", + value_name="value", + ) + .dropna(subset=["value"]) +) + +epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0] +epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index() + +measures_compared = epc_c_measures.merge( + epc_b_measures, + left_on="measure_type", + right_on="measure_type", + suffixes=("_epc_c", "_epc_b"), +) + +epc_c_retrofits = epc_c_recommendations[ + epc_c_recommendations["total_retrofit_cost"] > 0 + ] + +epc_b_retrofits = epc_b_recommendations[ + epc_b_recommendations["total_retrofit_cost"] > 0 + ] + +epc_c_retrofits["sap_points"].mean() +epc_b_retrofits["sap_points"].mean() + +properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b")) + +properties_in_both["total_retrofit_cost_epc_c"].mean() +properties_in_both["sap_points_epc_c"].mean() +properties_in_both["total_retrofit_cost_epc_b"].mean() +properties_in_both["sap_points_epc_b"].mean() + +# Solar PV savings - we need the amount of solar PV bill savings +from sqlalchemy.orm import sessionmaker +from backend.app.db.connection import db_engine +from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials +from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel +from collections import defaultdict + +PORTFOLIO_ID = 434 # Peabody +SCENARIOS = [ + 904, + 905 +] +scenario_names = { + 904: "EPC C - no solid floor, ashp 3.0", + 905: "EPC B - no solid floor, ashp 3.0", +} + + +def get_data(portfolio_id, scenario_ids): + session = sessionmaker(bind=db_engine)() + session.begin() + + # -------------------- + # Properties + # -------------------- + properties_query = session.query( + PropertyModel, + PropertyDetailsEpcModel + ).join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id + ).filter( + PropertyModel.portfolio_id == portfolio_id + ).all() + + properties_data = [ + { + **{col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns}, + **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns}, + } + for p in properties_query + ] + + # -------------------- + # Plans + # -------------------- + plans_query = session.query(Plan).filter( + Plan.scenario_id.in_(scenario_ids) + ).all() + + plans_data = [ + {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + for plan in plans_query + ] + + plan_ids = [p["id"] for p in plans_data] + + # -------------------- + # Recommendations (NO materials yet) + # -------------------- + recommendations_query = session.query( + Recommendation, + Plan.scenario_id + ).join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id + ).join( + Plan, + Plan.id == PlanRecommendations.plan_id + ).filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False) + ).all() + + recommendations_data = [ + { + **{col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns}, + "scenario_id": r.scenario_id, + "materials": [] # placeholder + } + for r in recommendations_query + ] + + recommendation_ids = [r["id"] for r in recommendations_data] + + # -------------------- + # Recommendation materials (SEPARATE QUERY) + # -------------------- + materials_query = session.query( + RecommendationMaterials + ).filter( + RecommendationMaterials.recommendation_id.in_(recommendation_ids) + ).all() + + # Group materials by recommendation_id + materials_by_recommendation = defaultdict(list) + + for m in materials_query: + materials_by_recommendation[m.recommendation_id].append({ + "material_id": m.material_id, + "depth": m.depth, + "quantity": m.quantity, + "quantity_unit": m.quantity_unit, + "estimated_cost": m.estimated_cost, + }) + + # Attach materials safely (no filtering side effects) + for r in recommendations_data: + r["materials"] = materials_by_recommendation.get(r["id"], []) + + session.close() + + return properties_data, plans_data, recommendations_data + + +properties_data, plans_data, recommendations_data = get_data( + portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS +) + +recommendations_df = pd.DataFrame(recommendations_data) +properties_df = pd.DataFrame(properties_data) + +solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"] +average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index() + +# Check tenures +initial_asset_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Properties" +) +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +sustainability_sample = sustainability_data[ + sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values) +] + +sustainability_sample = sustainability_sample.merge( + initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset") +) + +block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False) +block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False) + +initial_asset_data.columns +initial_asset_data["LeaseType"].value_counts() + +# sustainability_sample["Tenure Group"].value_counts() +# Tenure Group +# General Needs 57787 +# Home Ownership 25471 +# Care & Supported Housing 4239 +# Rental 2677 +# Other 188 + +df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index() +df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False) + +tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index() +tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False) + +initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts() + +sample_data = initial_asset_data[ + ~initial_asset_data["Ownership Type"].isin( + [ + # Commercial # Everything is resi - based on the Residential Indicator variable - all are true + # Freeholder + "FREEHOLDER", # 19517 properties + # HOMEBUY / EQUITY LOAN + "Rent to Homebuy", # 1 property + # Leaseholder + "LEASEHOLD 100%", # 8455 properties + "Owned and Managed - 999 year lease", # 2076 properties + "Managed but not Owned-Private Lease", # 159 properties + "Owned and managed LEASEHOLD", # 26 properties + # Outright Sale - can't find anything matching + # SHARED EQUITY + "Shared Ownership", # 4065 properties + "Shared Ownership Owned Not Managed", # 23 properties + # Extra categories which seem sensible to exclude + "NOT MANAGED AND NOT OWNED" + ] + ) +] + +sample_data["Ownership Type"].value_counts() + +sample_data = initial_asset_data[ + initial_asset_data["Ownership Type"].isin( + [ + "Owned and Managed", + "Owned and Managed - 999 year lease", + "Owned and managed LEASEHOLD", + "LEASEHOLD 100%", + "DATALOAD DEFAULT" + ] + ) +] +dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)] +dropped["Ownership Type"].value_counts() + +for value in [ + # Commercial # Everything is resi, so should be fine. No matches + # Freeholder + "FREEHOLDER", # 19517 properties + # HOMEBUY / EQUITY LOAN + "Rent to Homebuy", # 1 property + # Leaseholder + "LEASEHOLD 100%", # 8455 properties + "Owned and Managed - 999 year lease", # 2076 properties + "Managed but not Owned-Private Lease", # 159 properties + "Owned and managed LEASEHOLD", # 26 properties + # Outright Sale - can't find anything matching + # SHARED EQUITY + "Shared Ownership", # 4065 properties + "Shared Ownership Owned Not Managed", # 23 properties +]: + print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0]) + +house_types = [ + "HOUSE", + "BUNGALOW", + "MAISONETTE", + "DUPLEX", +] + +guaranteed_control = [ + "Owned and Managed", + "Owned and Managed - 999 year lease", + "Owned and managed LEASEHOLD", + "LEASEHOLD 100%", + "DATALOAD DEFAULT", +] + +sample_data = initial_asset_data[ + ( + initial_asset_data["Ownership Type"].isin(guaranteed_control) + ) + | + ( + (initial_asset_data["Ownership Type"] == "FREEHOLDER") + & + (initial_asset_data["Property Type"].isin(house_types)) + ) + ] + +fabric_retrofit_sample = initial_asset_data[ + initial_asset_data["Ownership Type"].isin( + [ + "Owned and Managed", + "FREEHOLDER", + "DATALOAD DEFAULT", + ] + ) +] + +initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts() +initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts() + +initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts() +z = initial_asset_data[ + ~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types) + ] + +block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False) +zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"] + +potential_sample = initial_asset_data[ + ~pd.isnull(initial_asset_data["BlockCode"]) +] + +compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge( + initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(), + left_on="Property Type", + right_on="Property Type", + suffixes=("_on_block_codes", "_overall") +) + +# Comparison of smaller sample vs overall +new_asset_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " + "- Peabody " + "- Data Extracts for Domna v2.xlsx", + sheet_name="Properties" +) + +new_sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " + "- Peabody " + "- Data Extracts for Domna v2.xlsx", + sheet_name="Sustainability" +) + +sap_bands = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " + "08012026.xlsx", +) + +combined = new_asset_data.merge( + new_sustainability_data, + left_on="UPRN", + right_on="Org Ref", + suffixes=("_asset", "_sustainability") +).merge( + sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef" +) +reduced_sample = combined[ + ~combined["AH Tenure"].isin( + ["Commercial", + "Freeholder", + "HOMEBUY / EQUITY LOAN", + "Leaseholder", + "Outright Sale", + "SHARED EQUITY", + "Shared Ownership"] + ) +].copy() + +# property types +property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge( + combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(), + left_on="Property Type", + right_on="Property Type", + suffixes=("_reduced_sample", "_overall") +) + +# lodged ratings +lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts( + normalize=True).to_frame().reset_index().merge( + combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(), + left_on="Lodged EPC Band", + right_on="Lodged EPC Band", + suffixes=("_reduced_sample", "_overall") +) + +# modelled ratings +modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts( + normalize=True).to_frame().reset_index().merge( + combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(), + left_on="SAP Band", + right_on="SAP Band", + suffixes=("_reduced_sample", "_overall") +) + +# Testing measures +m1 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, ashp 3.0 - 20250113 final.xlsx" +) +m2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx" +) + +compare = m1.merge( + m2, + left_on="uprn", + right_on="uprn", + suffixes=("_ewi_iwi", "_no_ewi_iwi") +) + +# Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario +only_no_ewi_iwi = compare[ + (compare["total_retrofit_cost_ewi_iwi"] == 0) & + (compare["total_retrofit_cost_no_ewi_iwi"] != 0) + ] + +(m1["total_retrofit_cost"] > 0).sum() +(m2["total_retrofit_cost"] > 0).sum() + +with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0] + +z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])] diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/l_reduced_sample.py b/etl/customers/peabody/Nov 2025 Consulting Project/l_reduced_sample.py new file mode 100644 index 00000000..cbc52447 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/l_reduced_sample.py @@ -0,0 +1,115 @@ +import pandas as pd + +initial_asset_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Properties" +) + +sustainability_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " + "- Data Extracts for Domna.xlsx", + sheet_name="Sustainability" +) + +asset_data_v2 = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " + "- Peabody " + "- Data Extracts for Domna v2.xlsx", + sheet_name="Properties" +) + +desired_ownerships = asset_data_v2[ + ~asset_data_v2["AH Tenure"].isin( + {"Commercial", + "Freeholder", + "HOMEBUY / EQUITY LOAN", + "Leaseholder", + "Outright Sale", + "SHARED EQUITY", + "Shared Ownership"} + ) +] + +desired_ownerships["Ownership Type"].value_counts() + +removed_ownerships = initial_asset_data[ + ~initial_asset_data["UPRN"].isin(desired_ownerships["UPRN"].values) +]["Ownership Type"].value_counts() + +sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - " + "ownership filtered sal.xlsx", + sheet_name="Standardised Asset List" +) + +# What did we include, that we shouldn't have? +should_have_been_dropped = sal[ + ~sal["landlord_property_id"].isin(desired_ownerships["UPRN"].values) +] + +needs_to_be_added = desired_ownerships[ + ~desired_ownerships["UPRN"].isin(sal["landlord_property_id"].values) +] + +# Merge on ownership types +sal = sal.merge( + initial_asset_data[["UPRN", "Ownership Type"]], + left_on="domna_property_id", + right_on="UPRN", +) + +# Remove the irrelevant ownership types +sal = sal[ + ~sal["Ownership Type"].isin( + [ + # Commercial # Everything is resi - based on the Residential Indicator variable - all are true + # Freeholder + "FREEHOLDER", # 19517 properties + # HOMEBUY / EQUITY LOAN + "Rent to Homebuy", # 1 property + # Leaseholder + "LEASEHOLD 100%", # 8455 properties + "Owned and Managed - 999 year lease", # 2076 properties + "Managed but not Owned-Private Lease", # 159 properties + "Owned and managed LEASEHOLD", # 26 properties + # Outright Sale - can't find anything matching + # SHARED EQUITY + "Shared Ownership", # 4065 properties + "Shared Ownership Owned Not Managed", # 23 properties + # Extra categories which seem sensible to exclude + "NOT MANAGED AND NOT OWNED" + ] + ) +] + +sal["landlord_property_id"] = sal["domna_property_id"].copy() + +# Store this SAL in three batches +filename = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - " + "ownership filtered sal.xlsx" +) +with pd.ExcelWriter(filename) as writer: + sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) + # Store the three sections + sal[0:20000].to_excel(writer, sheet_name="Batch 1", index=False) + sal[20000:40000].to_excel(writer, sheet_name="Batch 2", index=False) + sal[40000:].to_excel(writer, sheet_name="Batch 3", index=False) + +# Test reading back in and assembling +# b1 = pd.read_excel( +# filename, +# sheet_name="Batch 1" +# ) +# b2 = pd.read_excel( +# filename, +# sheet_name="Batch 2" +# ) +# b3 = pd.read_excel( +# filename, +# sheet_name="Batch 3" +# ) +# assembled_sal = pd.concat([b1, b2, b3]) +# # Make sure we have the right # of UPRNs +# assert assembled_sal["epc_os_uprn"].nunique() == sal["epc_os_uprn"].nunique() diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py b/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py new file mode 100644 index 00000000..a18dc315 --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py @@ -0,0 +1,293 @@ +# ------ Pull in the full SAL sample ------ +import pandas as pd + +full_sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " + "SAL/Depracated/20260107 corrected batch 6 sal.xlsx", + sheet_name="Standardised Asset List" +) + +# ------Pull in the reduced sample ------ +# This has a slightly incorrect mix of ownership types. Some properties will need to be dropped and others, added +reduced_sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - " + "ownership filtered sal.xlsx", + sheet_name="Standardised Asset List" +) + +# ------ Pull in the confirmed ownership column from Peabody ------ +new_asset_data = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " + "- Peabody " + "- Data Extracts for Domna v2.xlsx", + sheet_name="Properties" +) + +correct_sample = new_asset_data[ + ~new_asset_data["AH Tenure"].isin( + ["Commercial", + "Freeholder", + "HOMEBUY / EQUITY LOAN", + "Leaseholder", + "Outright Sale", + "SHARED EQUITY", + "Shared Ownership"] + ) +].copy() + +# ------- Stuff to add ------- +# These are properties that need to be added to the reduced sample, from the SAL +stuff_to_add = correct_sample[ + ~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values) +]["UPRN"].values + +sal_to_add = full_sal[ + full_sal["domna_property_id"].isin(stuff_to_add) +].copy() + +# ------- Stuff to remove ------- +stuff_to_remove = reduced_sal[ + ~reduced_sal["landlord_property_id"].isin(correct_sample["UPRN"].values) +]["landlord_property_id"].values + +to_delete = reduced_sal[ + reduced_sal["landlord_property_id"].isin(stuff_to_remove) +].copy() + +# ------- Create the correctly formatted SAL, with an individual batch for properties we need to add ------- + +# This is what is correct, from the reduced sample, after removing the incorrect ownership types +reduced_sal_final = reduced_sal[ + ~reduced_sal["landlord_property_id"].isin(stuff_to_remove) +].copy() + +sal_to_add["landlord_property_id"] = sal_to_add["domna_property_id"].copy() + +full_sal = pd.concat( + [reduced_sal_final, sal_to_add], +) + +# filename = ( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260113 - " +# "final asset list.xlsx" +# ) +# with pd.ExcelWriter(filename) as writer: +# full_sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) +# # Store the three sections +# reduced_sal_final[0:25000].to_excel(writer, sheet_name="Batch 1 - was correct", index=False) +# reduced_sal_final[25000:].to_excel(writer, sheet_name="Batch 2 - was correct", index=False) +# sal_to_add.to_excel(writer, sheet_name="Batch 3 - needs adding", index=False) + +# We now prepare the process of getting the associated +# We have the properties we need to delete. We can get their associated plans for all scenario IDs +scenario_ids = [908, 909, 910] + +import pandas as pd +from sqlalchemy.orm import Session +from backend.app.db.models.portfolio import PropertyModel +from backend.app.db.connection import db_session, db_read_session +from sqlalchemy import select, func +from sqlalchemy.orm import Session +from backend.app.db.models.recommendations import Plan + +uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist() + +# PORTFOLIO_ID = 435 + +# SCENARIO_ID_WITH_PLANS_TO_DELETE = 910 + + +# Get the property IDs for these UPRNs +# def get_property_ids_for_uprns(session: Session, uprns: list[int], portfolio_id) -> list[int]: +# return [ +# property_id +# for (property_id,) in +# session.query(PropertyModel.id) +# .filter( +# PropertyModel.uprn.in_(uprns), +# PropertyModel.portfolio_id == portfolio_id +# ) +# .all() +# ] +# +# +# with db_read_session() as session: +# property_ids_to_delete = get_property_ids_for_uprns( +# session, uprns_to_be_deleted, portfolio_id=PORTFOLIO_ID +# ) +# +# +# def count_plans_for_scenario(session: Session, scenario_id: int, portfolio_id, property_ids) -> int: +# return session.execute( +# select(func.count()) +# .select_from(Plan) +# .where( +# Plan.scenario_id == scenario_id, +# Plan.portfolio_id == portfolio_id, +# Plan.property_id.in_(property_ids) +# ) +# ).scalar_one() +# +# +# with db_session() as session: +# n_plans = count_plans_for_scenario( +# session, +# scenario_id=SCENARIO_ID_WITH_PLANS_TO_DELETE, +# portfolio_id=PORTFOLIO_ID, +# property_ids=property_ids_to_delete +# ) +# +# +# def get_plan_ids_for_scenario( +# session: Session, scenario_id: int, portfolio_id, property_ids +# ) -> list[int]: +# result = session.execute( +# select(Plan.id, Plan.property_id) +# .where( +# Plan.scenario_id == scenario_id, +# Plan.portfolio_id == portfolio_id, +# Plan.property_id.in_(property_ids) +# ) +# ) +# return [{"plan_id": row.id, "property_id": row.property_id} for row in result] +# +# +# with db_session() as session: +# plan_ids_to_property = get_plan_ids_for_scenario( +# session, +# scenario_id=SCENARIO_ID_WITH_PLANS_TO_DELETE, +# portfolio_id=PORTFOLIO_ID, +# property_ids=property_ids_to_delete +# ) +# +# df = pd.DataFrame(plan_ids_to_property) +# df[df["property_id"].duplicated()].shape +# +# plan_ids = [row["plan_id"] for row in plan_ids_to_property] +# +# +# def chunked(iterable, size): +# for i in range(0, len(iterable), size): +# yield iterable[i:i + size] +# +# +# from sqlalchemy import text +# from sqlalchemy.orm import Session +# +# +# def delete_plan_batch(session: Session, plan_ids: list[int]): +# if not plan_ids: +# return +# +# session.execute(text("SET LOCAL lock_timeout = '5s'")) +# +# params = {"plan_ids": plan_ids} +# +# # ---------------------------- +# # recommendation_materials +# # ---------------------------- +# session.execute( +# text(""" +# DELETE FROM recommendation_materials rm +# USING plan_recommendations pr +# WHERE rm.recommendation_id = pr.recommendation_id +# AND pr.plan_id = ANY(:plan_ids) +# """), +# params, +# ) +# +# # ---------------------------- +# # plan_recommendations +# # ---------------------------- +# session.execute( +# text(""" +# DELETE FROM plan_recommendations +# WHERE plan_id = ANY(:plan_ids) +# """), +# params, +# ) +# +# # ---------------------------- +# # recommendations (only those used by these plans) +# # ---------------------------- +# session.execute( +# text(""" +# DELETE FROM recommendation r +# WHERE r.id IN ( +# SELECT DISTINCT recommendation_id +# FROM plan_recommendations +# WHERE plan_id = ANY(:plan_ids) +# ) +# """), +# params, +# ) +# +# # ---------------------------- +# # plans LAST +# # ---------------------------- +# session.execute( +# text(""" +# DELETE FROM plan +# WHERE id = ANY(:plan_ids) +# """), +# params, +# ) +# +# +# batch_size = 25 +# total = (len(plan_ids) + batch_size - 1) // batch_size +# +# for i, batch in enumerate(chunked(plan_ids, batch_size), start=1): +# print(f"Deleting plan batch {i}/{total} ({len(batch)} plans)") +# +# with db_session() as session: +# delete_plan_batch(session, batch) +# +# print(f"Batch {i} committed") +# +# # Now, we delete the associated properties in batch and associated objects. It should +# # largely be property, property details +# property_ids_to_delete +# +# from sqlalchemy import text +# from sqlalchemy.orm import Session +# +# +# def move_properties_between_portfolios( +# session: Session, +# property_ids: list[int], +# from_portfolio_id: int, +# to_portfolio_id: int, +# ): +# if not property_ids: +# return 0 +# +# result = session.execute( +# text(""" +# UPDATE property +# SET portfolio_id = :to_portfolio_id +# WHERE portfolio_id = :from_portfolio_id +# AND id = ANY(:property_ids) +# """), +# { +# "property_ids": property_ids, +# "from_portfolio_id": from_portfolio_id, +# "to_portfolio_id": to_portfolio_id, +# }, +# ) +# +# return result.rowcount +# +# +# # Moved? +# # 573476, 586011 +# +# property_ids_to_delete2 = [x for x in property_ids_to_delete if x not in [573476, 586011]] +# +# with db_session() as session: +# n_moved = move_properties_between_portfolios( +# session, +# property_ids=property_ids_to_delete2, +# from_portfolio_id=PORTFOLIO_ID, +# to_portfolio_id=32, # Archive portfolio +# ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py b/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py new file mode 100644 index 00000000..4bd11a1b --- /dev/null +++ b/etl/customers/peabody/Nov 2025 Consulting Project/n_fixing_already_installed_bug.py @@ -0,0 +1,80 @@ +# 1) Need to get all already installed measures +# 2) get the unique uprns for these properties +# 3) Create a re-fresh SAL for these properties +# 4) re-trigger EPC C w/o EWI/IWI + the EPC B scenario + +from backend.app.db.models.recommendations import InstalledMeasure +from backend.app.db.connection import db_session +from etl.customers.cambridge.surveys import current_epc + +# Get all installed measures from the installedMeasure table +with db_session() as session: + # We need installed measures, where the measure type is ewi or iwi + installed_measures = session.query(InstalledMeasure).filter( + InstalledMeasure.measure_type.in_(["cavity_wall_insulation"]) + ).all() + # Get the uprns + installed_uprns = [x.uprn for x in installed_measures] + +installed_uprns = list(set(installed_uprns)) + +# We then create a portfolio of properties we need to re-run +import pandas as pd + +sal = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260113 - " + "final asset list.xlsx", + sheet_name="Standardised Asset List" +) + +needing_retry = sal[sal["epc_os_uprn"].isin(installed_uprns)] + +# Store +needing_retry.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " + "SAL/properties_needing_retry_20260115 - cavity wall insulation.xlsx", + sheet_name="Standardised Asset List", + index=False +) + +#### Testing +with_ewi = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, ashp 3.0 - 20250113 final.xlsx" +) +without_ewi = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no " + "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx" +) + +comparison = with_ewi.merge( + without_ewi, + left_on="uprn", + right_on="uprn", + suffixes=("_with_ewi", "_without_ewi") +) + +with_ewi = comparison[comparison["total_retrofit_cost_with_ewi"] > 0] +with_ewi["current_epc_rating_with_ewi"].value_counts() +with_ewi["current_epc_rating_with_ewi"].value_counts() + +without_ewi = comparison[comparison["total_retrofit_cost_without_ewi"] > 0] +with_ewi = comparison[comparison["total_retrofit_cost_with_ewi"] > 0] + +with_ewi[with_ewi["current_epc_rating_with_ewi"] == "Epc.C"]["uprn"] + +to_fix = with_ewi[with_ewi["current_epc_rating_with_ewi"] == "Epc.C"] +to_fix = to_fix[["uprn", "address_with_ewi", "postcode_with_ewi", "property_type_with_ewi"]].rename( + columns={ + "address_with_ewi": "address", + "postcode_with_ewi": "postcode", + "property_type_with_ewi": "property_type" + } +).merge( + sal[["epc_os_uprn", "landlord_built_form"]], + left_on="uprn", + right_on="epc_os_uprn", + how="left" +).drop(columns=["epc_os_uprn"]) + +to_fix = to_fix.to_dict("records") diff --git a/etl/customers/waltham_forest/decent_homes_pilot.py b/etl/customers/waltham_forest/decent_homes_pilot.py new file mode 100644 index 00000000..0c7ea98f --- /dev/null +++ b/etl/customers/waltham_forest/decent_homes_pilot.py @@ -0,0 +1,744 @@ +import json +import os +import pandas as pd +from datetime import datetime + + +def years_between(d1, d2): + # precise year difference (accounts for months/days) + return (d1.year - d2.year) - ((d1.month, d1.day) < (d2.month, d2.day)) + + +def get_element(elements, label): + """Safely get an element dict by display label (your JSON keys).""" + return elements.get(label) + + +def append_result(decent_homes_meta, criteria, variable, sub_variable, result, install_date=None, expiry_date=None): + decent_homes_meta.append({ + "criteria": criteria, + "variable": variable, + "sub_variable": sub_variable, + "result": result, + "hhsrs_rank": None, + "hhsrs_score": None, + "install_date": install_date, + "expiry_date": expiry_date, + }) + + +# Read in static json, which is transformed by Jun-te's script +folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Waltham Forest/Decent Homes Pilot" +filenames = ["flat 1.json", "house 1.json"] + +# Standardised variables which will form the enums in the db +HHSRS_VARIABLES = [ + "damp_and_mould_growth", + "excess_cold", + "excess_heat", + "asbestos_and_mm_fibres", + "biocides", + "carbon_monoxide_and_fuel_combustion_products", + "lead", + "radiation", + "uncombusted_fuel_gas", + "volatile_organic_compounds", + "crowding_and_space", + "entry_by_intruders", + "lighting", + "noise", + "domestic_hygiene_pests_and_refuse", + "food_safety", + "personal_hygiene_sanitation_and_drainage", + "water_supply", + "falls_associated_with_baths", + "falls_on_level_surfaces", + "falls_on_stairs_and_steps", + "falls_between_levels", + "electrical_hazards", + "fire", + "flames_hot_surfaces_and_materials", + "collision_and_entrapment", + "explosions", + "ergonomics", + "structural_collapse_and_falling_elements" +] + +ELEMENT_CODE_TO_DESCRIPTION = { + # One-to-one + "HHSRSDAMP": "damp_and_mould_growth", + "HHSRSCOLD": "excess_cold", + "HHSRSHEAT": "excess_heat", + "HHSRSASB": "asbestos_and_mm_fibres", + "HHSRSBIOC": "biocides", + "HHSRSLEAD": "lead", + "HHSRSRADIA": "radiation", + "HHSRSFUEL": "uncombusted_fuel_gas", + "HHSRSORGAN": "volatile_organic_compounds", + "HHSRSCROWD": "crowding_and_space", + "HHSRSENTRY": "entry_by_intruders", + "HHSRSLIGHT": "lighting", + "HHSRSNOISE": "noise", + "HHSRSDOMES": "domestic_hygiene_pests_and_refuse", + "HHSRSFOOD": "food_safety", + "HHSRSPERS": "personal_hygiene_sanitation_and_drainage", + "HHSRSWATER": "water_supply", + "HHSRSFBATH": "falls_associated_with_baths", + "HHSRSFLEVE": "falls_on_level_surfaces", + "HHSRSFSTAI": "falls_on_stairs_and_steps", + "HHSRSFBETW": "falls_between_levels", + "HHSRSELEC": "electrical_hazards", + "HHSRSFIRE": "fire", + "HHSRSFLAME": "flames_hot_surfaces_and_materials", + "HHSRSEXPLO": "explosions", + "HHSRSPOSI": "ergonomics", + "HHSRSSTRUC": "structural_collapse_and_falling_elements", + + # One-to-many expansions + "HHSRSCO": "carbon_monoxide", + "HHSRSSO2": "sulphur_dioxide_and_smoke", + "HHSRSNO2": "nitrogen_dioxide", + "HHSRSENTRP": "collision_and_entrapment", + "HHSRSCLOW": "collision_hazards_and_low_headroom", +} + +CRITERION_B_VARIABLES = [ + "external_walls_structure", "lintels", "brickwork_spalling", "wall_finish", "roof_structure", "roof_finish", + "chimneys", "windows", "external_doors", "kitchens", "bathrooms", "central_heating_boiler", + "central_heating_distribution_system", "heating_other", "electrical_systems", +] + +CRITERION_C_VARIABLES = [ + "kitchen_less_than_20_years_old", "kitchen_adequate_space_and_layout", "bathroom_less_than_30_years_old", + "bathroom_wc_appropriately_located", "adequate_external_noise_insulation", "adequate_common_entrance_areas", +] + +# Criterion C explicit age limits (different from component lifespans used elsewhere) +CRITERION_C_AGE_LIMITS = { + "kitchen_years_max": 20, + "bathroom_years_max": 30, +} + +# Field labels as they appear in your JSON (based on your code) +LABEL_KITCHEN = "Adequacy of Kitchen and Type in Property" +LABEL_BATHROOM = "Adequacy of Bathroom Location in Property" +LABEL_NOISE = "Adequacy of Noise Insulation in Property" +LABEL_COMMON_CIRC = "Circulation Space in Common Area" # flats only + +STANDARD_HHSRS_MAPPING = {"pass": "TYPRISK", "fail": "MODRISK", "no_data": "TOBEASSESS"} + +# Criterion A - mapping of HHSRS variables to Waltham forest element codes +HHSRS_MAPPING = { + "damp_and_mould_growth": {"HHSRSDAMP": STANDARD_HHSRS_MAPPING}, + "excess_cold": {"HHSRSCOLD": STANDARD_HHSRS_MAPPING}, + "excess_heat": {"HHSRSHEAT": STANDARD_HHSRS_MAPPING}, + "asbestos_and_mm_fibres": {"HHSRSASB": STANDARD_HHSRS_MAPPING}, + "biocides": {"HHSRSBIOC": STANDARD_HHSRS_MAPPING}, + "carbon_monoxide_and_fuel_combustion_products": { + "HHSRSCO": STANDARD_HHSRS_MAPPING, + "HHSRSSO2": STANDARD_HHSRS_MAPPING, + "HHSRSNO2": STANDARD_HHSRS_MAPPING + }, + "lead": {"HHSRSLEAD": STANDARD_HHSRS_MAPPING}, + "radiation": {"HHSRSRADIA": STANDARD_HHSRS_MAPPING}, + "uncombusted_fuel_gas": {"HHSRSFUEL": STANDARD_HHSRS_MAPPING}, + "volatile_organic_compounds": {"HHSRSORGAN": STANDARD_HHSRS_MAPPING}, + "crowding_and_space": {"HHSRSCROWD": STANDARD_HHSRS_MAPPING}, + "entry_by_intruders": {"HHSRSENTRY": STANDARD_HHSRS_MAPPING}, + "lighting": {"HHSRSLIGHT": STANDARD_HHSRS_MAPPING}, + "noise": {"HHSRSNOISE": STANDARD_HHSRS_MAPPING}, + "domestic_hygiene_pests_and_refuse": {"HHSRSDOMES": STANDARD_HHSRS_MAPPING}, + "food_safety": {"HHSRSFOOD": STANDARD_HHSRS_MAPPING}, + "personal_hygiene_sanitation_and_drainage": {"HHSRSPERS": STANDARD_HHSRS_MAPPING}, + "water_supply": {"HHSRSWATER": STANDARD_HHSRS_MAPPING}, + "falls_associated_with_baths": {"HHSRSFBATH": STANDARD_HHSRS_MAPPING}, + "falls_on_level_surfaces": {"HHSRSFLEVE": STANDARD_HHSRS_MAPPING}, + "falls_on_stairs_and_steps": {"HHSRSFSTAI": STANDARD_HHSRS_MAPPING}, + "falls_between_levels": {"HHSRSFBETW": STANDARD_HHSRS_MAPPING}, + "electrical_hazards": {"HHSRSELEC": STANDARD_HHSRS_MAPPING}, + "fire": {"HHSRSFIRE": STANDARD_HHSRS_MAPPING}, + "flames_hot_surfaces_and_materials": {"HHSRSFLAME": STANDARD_HHSRS_MAPPING}, + "collision_and_entrapment": {"HHSRSENTRP": STANDARD_HHSRS_MAPPING, "HHSRSCLOW": STANDARD_HHSRS_MAPPING}, + "explosions": {"HHSRSEXPLO": STANDARD_HHSRS_MAPPING}, + "ergonomics": {"HHSRSPOSI": STANDARD_HHSRS_MAPPING}, + "structural_collapse_and_falling_elements": {"HHSRSSTRUC": STANDARD_HHSRS_MAPPING} +} + +# print(houses_waltham_forest_data[ +# houses_waltham_forest_data["ELEMENT CODE"] == "INTBTHADEQ" +# ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) + +# print(flats_waltham_forest_data[ +# flats_waltham_forest_data["ELEMENT CODE"] == "INTBTHADEQ" +# ][["ATTRIBUTE CODE", "ATTRIBUTE CODE DESCRIPTION"]].drop_duplicates()) + + +# Criterion B +B_COMPONENT_LABELS = { + # Key components + "wall_structure": [ + "Wall Structure in External Area", + ], + "lintels": [ + "Lintels in External Area", + ], + "brickwork_spalling": [ + "Wall Spalling in External Area", + ], + "wall_finish": [ + "Wall Finish 1 in External Area", + "Wall Finish 2 in External Area", + "External Decorations in External Area", + "Brickwork Pointing in External Area", + ], + "roof_structure": [ + "Roof Structure 1 in External Area", + "Roof Structure 2 in External Area", + "Roof Structure 3 in External Area", + "Garage Roof in External Area", + "Garage and Store Roofs in External Area", + "Store Roof in External Area", + "Fascia / Soffit / Bargeboard in External Area", + "Gutters in External Area", + "Downpipes in External Area", + "Internal Downpipes in External Area" + ], + "roof_finish": [ + "Roof Covering 1 in External Area", + "Roof Covering 2 in External Area", + "Roof Covering 3 in External Area", + ], + "chimneys": [ + "Chimneys in External Area", + ], + "windows": [ + "Windows in Property", + "Windows 1 in External Area", + "Windows 2 in External Area", + "Garage and Store Windows in External Area", + "Garage Windows in External Area", + "Store Windows in External Area", + ], + "external_doors": [ + "Type and Location of Front Door in Property", + "Front Door Fire Rating in Property", + "Patio and French Doors 1 in External Area", + "Back and Side Doors 1 in External Area", + "Back and Side Doors 2 in External Area", + "Garage and Store Doors in External Area", + "Garage Door in External Area", + "Store Door in External Area", + ], + "central_heating_boiler": [ + # "Heating Improvement Required in Property", + "Boiler Fuel in Property", + "Type of Water Heating in Property", + ], + "heating_other": [ + # "Heating Distribution System in Property" + "Boiler Fuel in Property", + "Type of Water Heating in Property", + ], + "electrical_systems": [ + "Electrics Required in Property", + ], + # Other components + "kitchen": [ + "Adequacy of Kitchen and Type in Property", + ], + "bathroom": [ + "Adequacy of Bathroom Location in Property", + ], + "central_heating_distribution_system": [ + "Heating Distribution System in Property", + ], +} + +KEY_COMPONENTS = { + "wall_structure", "lintels", "brickwork_spalling", "wall_finish", + "roof_structure", "roof_finish", "chimneys", "windows", + "external_doors", "central_heating_boiler", "heating_other", + "electrical_systems", +} +OTHER_COMPONENTS = { + "kitchen", "bathroom", "central_heating_distribution_system", +} + +# Criterion C +COMPONENT_LIFESPANS = { + # Key components + "wall_structure": { + "house": 80, "flat_below_6_storeys": 80, "flat_above_6_storeys": 80 + }, + "lintels": { + "house": 60, "flat_below_6_storeys": 60, "flat_above_6_storeys": 60 + }, + "brickwork_spalling": { + "house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "wall_finish": { + "house": 60, "flat_below_6_storeys": 60, "flat_above_6_storeys": 30 + }, + "roof_structure": { + "house": 50, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "roof_finish": { + "house": 50, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "chimneys": { + "house": 50, "flat_below_6_storeys": 50, "flat_above_6_storeys": None # N/A + }, + "windows": { + "house": 40, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "external_doors": { + "house": 40, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "central_heating_boiler": { + "house": 15, "flat_below_6_storeys": 15, "flat_above_6_storeys": 15 + }, + "heating_other": { + "house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "electrical_systems": { + "house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + + # Other components + "kitchen": { + "house": 30, "flat_below_6_storeys": 30, "flat_above_6_storeys": 30 + }, + "bathroom": { + "house": 40, "flat_below_6_storeys": 40, "flat_above_6_storeys": 40 + }, + "central_heating_distribution_system": { + "house": 40, "flat_below_6_storeys": 40, "flat_above_6_storeys": 40 + }, +} + +# Database design +# creation_date, uprn, variable, result (pass/fail/nodata), hhsrs_score (optional, numeric), hhsrs_rank (A-J), +# install_date (for components which expire, e.g. kitchen), remaining_life (for components which expire, e.g. kitchen), + +# TODO: Add the criterion +decent_homes_meta = [] +# Use to capture criterion A, B, C and D. Should be: +# {"uprn": int, "creation_date": datetime, "criterion_a": bool, "criterion_b": bool, "criterion_c": bool, +# "criterion_d": bool, "decent_homes": bool"} +property_decent_homes = [] +for fn in filenames: + with open(os.path.join(folder, fn), "rb") as f: + data = json.load(f) + + today = pd.Timestamp.today().normalize() + + property_info = data["property_info"] + if property_info["PROP TYPE"] in ["HOU"]: + property_type = "house" + elif property_info["PROP TYPE"] == "FLA": + raise NotImplementedError("Implement distrinction between below and above 6 storeys") + # property_type = "flat" + else: + raise NotImplementedError("Unknown property type") + + # ---------------- Criterion A ---------------- + # Critrion A: pass/fail + # If fail, why? + for hhsrs_variable, mapping in HHSRS_MAPPING.items(): + element_code = list(mapping.keys())[0] + + # Find the data in the JSON within data["elements"] + check_pass = [] + for k, v in data["elements"].items(): + if v["ELEMENT CODE"] == element_code: + # We check the attribute code + # Check if pass + if v["ATTRIBUTE CODE"] == mapping[element_code]["pass"]: + result = "pass" + elif v["ATTRIBUTE CODE"] == mapping[element_code]["fail"]: + result = "fail" + elif v["ATTRIBUTE CODE"] == mapping[element_code]["no_data"]: + result = "no_data" + else: + raise ValueError("Unknown attribute code") + check_pass.append(result) + append_result( + decent_homes_meta, + criteria="A", + variable=hhsrs_variable, + sub_variable=ELEMENT_CODE_TO_DESCRIPTION[element_code], + result=result, + install_date=None, + expiry_date=None, + ) + + # We check if we have a pass, fail or no_data + # if all([x == "pass" for x in check_pass]): + # hhsrs_result = "pass" + # elif any([x == "fail" for x in check_pass]): + # hhsrs_result = "fail" + # elif any([x == "no_data" for x in check_pass]): + # hhsrs_result = "no_data" + # else: + # raise NotImplementedError("Mixed results not implemented") + + # ---------------- Criterion B ---------------- + # Check each of the components + + # ---------------- Criterion B ---------------- + property_boiler = get_element(data["elements"], "Boiler Fuel in Property") + + for component, labels in B_COMPONENT_LABELS.items(): + for label in labels: + label_data = get_element(data["elements"], label) + + # Handle no-data or not-applicable + if label_data["ATTRIBUTE CODE"] in ["UNKNOWN", "NONE", "UNKNOWNG", "UNKNOWNS"]: + # append_result( + # decent_homes_meta, + # criteria="B", + # variable=component, + # sub_variable=label, + # result="pass", + # install_date=None, + # expiry_date=None, + # ) + continue + + # Special skip conditions for heating + no_boiler_condition = ( + property_boiler["ATTRIBUTE CODE"] in ["NONENOCH"] + and component == "central_heating_boiler" + ) + other_heating_condition = ( + label_data["ATTRIBUTE CODE"] in ["NONENOCH"] + and component == "heating_other" + ) + if no_boiler_condition or other_heating_condition: + # append_result( + # decent_homes_meta, + # criteria="B", + # variable=component, + # sub_variable=label, + # result="pass", + # install_date=None, + # expiry_date=None, + # ) + continue + + # Normal case: evaluate install date + lifetime + remaining life + install_date = pd.to_datetime(label_data["INSTALL DATE"]) + if pd.isnull(install_date): + raise ValueError(f"Missing install date for {component}/{label}") + + component_lifetime = COMPONENT_LIFESPANS[component][property_type] + is_old = years_between(today.to_pydatetime(), install_date.to_pydatetime()) > component_lifetime + + if pd.isnull(label_data["REMAINING LIFE"]): + raise ValueError(f"Missing remaining life for {component}/{label}") + has_failed = label_data["REMAINING LIFE"] < 0 + + expiry_date = install_date + pd.DateOffset(years=component_lifetime) + component_result = "fail" if is_old and has_failed else "pass" + + # Push into decent_homes_meta + append_result( + decent_homes_meta, + criteria="B", + variable=component, + sub_variable=label, + result=component_result, + install_date=str(install_date), + expiry_date=str(expiry_date), + ) + + # ---------------- Criterion C ---------------- + + # Guard: property type string already set earlier + is_flat = (property_info["PROP TYPE"] == "FLA") + + # 1) Kitchen age ≤ 20 years + kitchen = get_element(data["elements"], LABEL_KITCHEN) + if kitchen: + kit_install_raw = kitchen["INSTALL DATE"] + kit_install = pd.to_datetime(kit_install_raw) + kit_age_years = years_between(today.to_pydatetime(), kit_install.to_pydatetime()) + kitchen_age_result = "pass" if kit_age_years <= CRITERION_C_AGE_LIMITS["kitchen_years_max"] else "fail" + # For transparency, store next renewal as install + 20 years (criterion C perspective) + kit_next_due = kit_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["kitchen_years_max"]) + else: + raise NotImplementedError("Kitchen data missing - pls check") + append_result( + decent_homes_meta, + criteria="C", + variable="kitchen_less_than_20_years_old", + sub_variable="kitchen_less_than_20_years_old", + result=kitchen_age_result, + install_date=str(kit_install), + expiry_date=str(kit_next_due) + ) + + # 2) Kitchen adequate space/layout + # Prefer explicit codes if you have them, fall back to text in ATTRIBUTE CODE DESCRIPTION + if kitchen: + kit_attr_desc = kitchen["ATTRIBUTE CODE"] + if kit_attr_desc == "STDKITADQ": + kitchen_adequacy_result = "pass" + else: + raise NotImplementedError("No other observed codes yet") + else: + raise NotImplementedError("Kitchen data missing - pls check") + append_result( + decent_homes_meta, + criteria="C", + variable="kitchen_adequate_space_and_layout", + sub_variable="kitchen_adequate_space_and_layout", + result=kitchen_adequacy_result, + ) + + # 3) Bathroom age ≤ 30 years + bath = get_element(data["elements"], LABEL_BATHROOM) + if bath: + bth_install_raw = bath["INSTALL DATE"] + bth_install = pd.to_datetime(bth_install_raw) + bth_age_years = years_between(today.to_pydatetime(), bth_install.to_pydatetime()) + bathroom_age_result = "pass" if bth_age_years <= CRITERION_C_AGE_LIMITS["bathroom_years_max"] else "fail" + bth_next_due = bth_install + pd.DateOffset(years=CRITERION_C_AGE_LIMITS["bathroom_years_max"]) + else: + raise NotImplementedError("Bathroom data missing - pls check") + append_result( + decent_homes_meta, + criteria="C", + variable="bathroom_less_than_30_years_old", + sub_variable="bathroom_less_than_30_years_old", + result=bathroom_age_result, + install_date=str(bth_install), + expiry_date=bth_next_due + ) + + # 4) Bathroom/WC appropriately located + if bath: + bth_attr_code = bath["ATTRIBUTE CODE"] + if bth_attr_code in {"STDBTHADQ", "ADPBTHADQ"}: + bathroom_location_result = "pass" + else: + raise NotImplementedError("No other observed codes yet") + else: + raise NotImplementedError("Bathroom data missing - pls check") + + append_result( + decent_homes_meta, + criteria="C", + variable="bathroom_wc_appropriately_located", + sub_variable="bathroom_wc_appropriately_located", + result=bathroom_location_result + ) + + # 5) Adequate external noise insulation + noise = get_element(data["elements"], LABEL_NOISE) + if noise: + noise_code = noise["ATTRIBUTE CODE"] + if noise_code in {"ADEQUATE"}: + noise_result = "pass" + else: + raise NotImplementedError("No other observed codes yet") + else: + raise NotImplementedError("Noise insulation data missing - pls check") + append_result( + decent_homes_meta, + criteria="C", + variable="adequate_external_noise_insulation", + sub_variable="adequate_external_noise_insulation", + result=noise_result + ) + + # 6) Adequate common entrance areas (flats only) + if is_flat: + raise Exception("Pls check this") + common = get_element(data["elements"], LABEL_COMMON_CIRC) + if common: + circ_desc = common.get("ATTRIBUTE CODE DESCRIPTION", "") + common_areas_result = adequacy_result_by_text(circ_desc) + else: + common_areas_result = "no_data" + append_result(decent_homes_meta, "adequate_common_entrance_areas", common_areas_result) + + # ---------------- Criterion D ---------------- + # heating system type + heating = get_element(data["elements"], "Heating Improvement Required in Property") + if heating: + heat_type_code = heating["ATTRIBUTE CODE"] + if heat_type_code in {"NOTAPPLIC"}: + heating_type_result = "pass" + elif heat_type_code in {"WETINSFULL"}: + heating_type_result = "fail" + else: + raise NotImplementedError("No other observed codes yet") + else: + raise NotImplementedError("Heating element missing in dataset") + + append_result( + decent_homes_meta, + criteria="D", + variable="efficient_heating_system_type", + sub_variable="efficient_heating_system_type", + result=heating_type_result + ) + + # heating distribution + heating_dist = get_element(data["elements"], "Heating Distribution System in Property") + if heating_dist: + dist_code = heating_dist["ATTRIBUTE CODE"] + if dist_code == "UNKNOWN": + # For the observed case, there was no heating and wet heating needed to be installed in full so the value + # was unknown + heating_dist_result = "no_data" + else: + raise NotImplementedError("No other observed codes yet") + else: + raise NotImplementedError("Heating distribution element missing in dataset") + + append_result( + decent_homes_meta, + criteria="D", + variable="efficient_heating_distribution", + sub_variable="efficient_heating_distribution", + result=heating_dist_result + ) + + # insulation + loft = get_element(data["elements"], "Size in mm of Loft Insulation Thickness in Property") + wall = get_element(data["elements"], "Wall Insulation Improvement in External Area") + # To determine how much loft insulation is required + + # Loft insulation check (example threshold: ≥ 270mm = pass) + if loft: + # We have a specific code, where further loft insulation is needed - It appears the heating type check has + # already been completed in this dataset and so we just need to check the code + loft_code = loft["ATTRIBUTE CODE"] + if loft_code == "LOFTINSRQD": + loft_result = "fail" + elif loft_code.isnumeric(): + loft_result = "pass" + else: + raise NotImplementedError("Unknown loft insulation code - pls check") + else: + raise NotImplementedError("Loft insulation data missing - pls check") + append_result( + decent_homes_meta, + criteria="D", + variable="loft_insulation_sufficient", + sub_variable="loft_insulation_sufficient", + result=loft_result + ) + + # Wall insulation check + if wall: + wall_code = wall["ATTRIBUTE CODE"] + if wall_code in {"NONE"}: # Means no insulation improvement required + wall_result = "pass" + else: + raise NotImplementedError("No other observed codes yet") + else: + raise NotImplementedError("Wall insulation data missing - pls check") + append_result( + decent_homes_meta, + criteria="D", + variable="wall_insulation_sufficient", + sub_variable="wall_insulation_sufficient", + result=wall_result + ) + + # ---------------- Criterion A overall ---------------- + a_vars = set(HHSRS_MAPPING.keys()) + latest_a_results = {r["variable"]: r["result"] for r in decent_homes_meta if r["variable"] in a_vars} + + if any(v == "fail" for v in latest_a_results.values()): + criterion_a_result = "fail" + elif all(v == "pass" for v in latest_a_results.values()): + criterion_a_result = "pass" + else: + criterion_a_result = "no_data" + + # ---------------- Criterion B overall ---------------- + + component_results = {} + + for component in B_COMPONENT_LABELS.keys(): + comp_rows = [r for r in decent_homes_meta if + r["criteria"] == "B" and r["variable"] == component and r["sub_variable"] is not None] + comp_sub_results = [r["result"] for r in comp_rows] + + if not comp_sub_results: # no rows at all + comp_result = "no_data" + elif any(r == "fail" for r in comp_sub_results): + comp_result = "fail" + elif all(r == "pass" for r in comp_sub_results if r != "no_data"): + comp_result = "pass" + elif all(r == "no_data" for r in comp_sub_results): + comp_result = "no_data" + else: + comp_result = "no_data" + + component_results[component] = comp_result + + key_fails = [c for c, r in component_results.items() if c in KEY_COMPONENTS and r == "fail"] + other_fails = [c for c, r in component_results.items() if c in OTHER_COMPONENTS and r == "fail"] + + if key_fails: + criterion_b_result = "fail" + elif len(other_fails) >= 2: + criterion_b_result = "fail" + elif all(r == "no_data" for r in component_results.values()): + criterion_b_result = "no_data" + else: + criterion_b_result = "pass" + + # ---------------- Criterion C overall ---------------- + criterion_c_vars = [ + "kitchen_less_than_20_years_old", + "kitchen_adequate_space_and_layout", + "bathroom_less_than_30_years_old", + "bathroom_wc_appropriately_located", + "adequate_external_noise_insulation", + ] + if is_flat: + criterion_c_vars.append("adequate_common_entrance_areas") + + latest_c_results = {r["variable"]: r["result"] for r in decent_homes_meta if r["variable"] in criterion_c_vars} + + count_fails = sum(1 for v in latest_c_results.values() if v == "fail") + # optionally count no_data too if you want strict interpretation + criterion_c_result = "fail" if count_fails >= 3 else "pass" + + # ---------------- Criterion D overall ---------------- + # Needs to have both efficient geating and distribution so all should pass + criterion_d_vars = [ + "efficient_heating_system_type", + "efficient_heating_distribution", + "loft_insulation_sufficient", + "wall_insulation_sufficient", + ] + latest_d_results = {r["variable"]: r["result"] for r in decent_homes_meta if r["variable"] in criterion_d_vars} + + if any(v == "fail" for v in latest_d_results.values()): + criterion_d_result = "fail" + elif all(v == "pass" for v in latest_d_results.values()): + criterion_d_result = "pass" + else: + criterion_d_result = "no_data" + + # ---------------- Append to property_decent_homes ---------------- + property_decent_homes.append({ + "uprn": property_info.get("UPRN"), # TODO: Need UPRN + "creation_date": datetime.now().date().isoformat(), + "criterion_a": criterion_a_result, + "criterion_b": criterion_b_result, + "criterion_c": criterion_c_result, + "criterion_d": criterion_d_result, + "decent_homes": ( + criterion_a_result == "pass" + and criterion_c_result == "pass" + and criterion_d_result == "pass" + ) + }) diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py index e2740745..da83eb05 100644 --- a/etl/epc/DataProcessor.py +++ b/etl/epc/DataProcessor.py @@ -4,6 +4,7 @@ import pandas as pd from etl.epc.settings import ( DATA_PROCESSOR_SETTINGS, EARLIEST_EPC_DATE, + POST_SAP10_DATE, # IGNORED_TRANSACTION_TYPES, IGNORED_FLOOR_LEVELS, IGNORED_PROPERTY_TYPES, @@ -21,7 +22,7 @@ from etl.epc.settings import ( ENDING_SUFFIX_COMPONENT_COLS, POTENTIAL_COLUMNS, EFFICIENCY_FEATURES, - DATA_ANOMALY_MATCHES + DATA_ANOMALY_MATCHES, ) from recommendations.rdsap_tables import FLOOR_LEVEL_MAP @@ -47,6 +48,8 @@ construction_age_bounds_map = { "England and Wales: 2003-2006": {"l": 2003, "u": 2006}, "England and Wales: 2007-2011": {"l": 2007, "u": 2011}, "England and Wales: 2012 onwards": {"l": 2012, "u": 3000}, + "England and Wales: 2012-2021": {"l": 2012, "u": 2021}, + "England and Wales: 2022 onwards": {"l": 2022, "u": 3000}, } construction_age_remap = { @@ -157,6 +160,9 @@ class EPCDataProcessor: # colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"], # ) + # Create post sap10 flag + self.create_post_sap10_flag() + # When running in newdata mode, cleaning_averages has lower cases so we co-erce back to upper cleaning_averages = self.cleaning_averages.copy() if self.run_mode == "newdata": @@ -173,6 +179,13 @@ class EPCDataProcessor: self.cast_cleaning_averages_columns_to_lower(ignore_step=ignore_step) self.cast_data_columns_to_lower() + def create_post_sap10_flag(self): + """ + Create a flag to indicate if the epc is post sap10 + """ + + self.data["is_post_sap10"] = self.data["LODGEMENT_DATE"] >= POST_SAP10_DATE + def cast_data_columns_to_lower(self): """ Convert all columns names to lower @@ -247,7 +260,8 @@ class EPCDataProcessor: # Map all anomaly values to None data_anomaly_map = dict( zip( - DATA_ANOMALY_MATCHES, [None] * len(DATA_ANOMALY_MATCHES), + DATA_ANOMALY_MATCHES, + [None] * len(DATA_ANOMALY_MATCHES), ) ) @@ -691,7 +705,7 @@ class EPCDataProcessor: [ violation_uprn_missing, violation_old_lodgment_date, - violation_invalid_transaction_type, + # violation_invalid_transaction_type, violation_ignored_floor_level, violation_rdsap_score_above_max, violation_missing_windows_description, @@ -747,6 +761,12 @@ class EPCDataProcessor: self.data = self.data[~pd.isnull(self.data["HOTWATER_DESCRIPTION"])] self.data = self.data[~pd.isnull(self.data["ROOF_DESCRIPTION"])] + # Remove any walls described as Basement walls since these are non-standard + # TODO: CHECK IF WE SHOULD MAP THESE U VALUES INSTEAD + index_to_remove = self.data["WALLS_DESCRIPTION"] == "Basement wall" + print(f"Removing {index_to_remove.sum()} records with basement walls") + self.data = self.data[~index_to_remove] + # Because park homes are surveyed unusually (for example, we don't have u-values to # look up for their different components, they need to be collected in survey and aren't reflected in # EPCs) we'll ignore them from the model @@ -848,7 +868,9 @@ class EPCDataProcessor: # Fill NaN values with averages for col in cols_to_clean: - data_to_clean[col] = data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"]) + data_to_clean[col] = data_to_clean[col].fillna( + data_to_clean[f"{col}_AVERAGE"] + ) data_to_clean = data_to_clean.drop(columns=[f"{col}_AVERAGE"]) # If we still have missings data_to_clean[col] = data_to_clean[col].fillna(data_to_clean[col].mean()) diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 5d3720fc..7c27de51 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -8,7 +8,9 @@ from etl.epc_clean.epc_attributes.FloorAttributes import FloorAttributes from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes -from etl.epc_clean.epc_attributes.MainheatControlAttributes import MainheatControlAttributes +from etl.epc_clean.epc_attributes.MainheatControlAttributes import ( + MainheatControlAttributes, +) from etl.epc_clean.epc_attributes.WindowAttributes import WindowAttributes from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes @@ -169,7 +171,7 @@ class TrainingDataset(BaseDataset): self.df = pd.DataFrame([dataset.difference_record for dataset in datasets]) self._feature_generation() - self._drop_features() + # self._drop_features() self._clean_efficiency_variables() self._null_validation(information="Clean Efficiency Variables") self._expand_description_to_features(cleaned_lookup) @@ -392,12 +394,13 @@ class TrainingDataset(BaseDataset): axis=1, ) - roof_starting_uvalue = self.df["roof_thermal_transmittance"].fillna( - roof_starting_uvalue - ) - roof_ending_uvalue = self.df["roof_thermal_transmittance_ending"].fillna( - roof_ending_uvalue - ) + roof_starting_uvalue = pd.to_numeric( + self.df["roof_thermal_transmittance"], errors="coerce" + ).fillna(roof_starting_uvalue) + + roof_ending_uvalue = pd.to_numeric( + self.df["roof_thermal_transmittance_ending"], errors="coerce" + ).fillna(roof_ending_uvalue) # ~~~~~~~~~~~~~~~~~~ # Floor @@ -451,26 +454,23 @@ class TrainingDataset(BaseDataset): lambda row: self._lambda_function_to_generate_floor_uvalue(row), axis=1 ) floor_ending_uvalue = self.df.apply( - lambda row: self._lambda_function_to_generate_floor_uvalue( - row, is_end=True - ), - axis=1, + lambda row: self._lambda_function_to_generate_floor_uvalue(row, is_end=True), axis=1 ) - floor_starting_uvalue = self.df["floor_thermal_transmittance"].fillna( - floor_starting_uvalue - ) - floor_ending_uvalue = self.df["floor_thermal_transmittance_ending"].fillna( - floor_ending_uvalue - ) + floor_starting_uvalue = pd.to_numeric( + self.df["floor_thermal_transmittance"], errors="coerce" + ).fillna(floor_starting_uvalue) + floor_ending_uvalue = pd.to_numeric( + self.df["floor_thermal_transmittance_ending"], errors="coerce" + ).fillna(floor_ending_uvalue) for component in ["walls", "roof", "floor"]: - self.df[f"{component}_thermal_transmittance"] = self.df[ - f"{component}_thermal_transmittance" - ].fillna(eval(f"{component}_starting_uvalue")) - self.df[f"{component}_thermal_transmittance_ending"] = self.df[ - f"{component}_thermal_transmittance_ending" - ].fillna(eval(f"{component}_ending_uvalue")) + self.df[f"{component}_thermal_transmittance"] = pd.to_numeric( + self.df[f"{component}_thermal_transmittance"], errors="coerce" + ).fillna(eval(f"{component}_starting_uvalue")) + self.df[f"{component}_thermal_transmittance_ending"] = pd.to_numeric( + self.df[f"{component}_thermal_transmittance_ending"], errors="coerce" + ).fillna(eval(f"{component}_ending_uvalue")) self.df = self.df.drop( columns=[ @@ -498,56 +498,43 @@ class TrainingDataset(BaseDataset): Drop properties that have inconsistent data, i.e. changing material types """ + starting_and_finishing_null = ( + expanded_df["original_description"].isin([None, ""]) & + expanded_df["original_description_ending"].isin([None, ""]) + ) + if component == "walls": expanded_df = expanded_df[ - (expanded_df["is_cavity_wall"] == expanded_df["is_cavity_wall_ending"]) - & ( - expanded_df["is_solid_brick"] - == expanded_df["is_solid_brick_ending"] - ) - & ( - expanded_df["is_timber_frame"] - == expanded_df["is_timber_frame_ending"] - ) - & ( - expanded_df["is_granite_or_whinstone"] - == expanded_df["is_granite_or_whinstone_ending"] - ) - & (expanded_df["is_cob"] == expanded_df["is_cob_ending"]) - & ( - expanded_df["is_sandstone_or_limestone"] - == expanded_df["is_sandstone_or_limestone_ending"] + starting_and_finishing_null | ( + (expanded_df["is_cavity_wall"] == expanded_df["is_cavity_wall_ending"]) + & (expanded_df["is_solid_brick"] == expanded_df["is_solid_brick_ending"]) + & (expanded_df["is_timber_frame"] == expanded_df["is_timber_frame_ending"]) + & (expanded_df["is_granite_or_whinstone"] == expanded_df["is_granite_or_whinstone_ending"]) + & (expanded_df["is_cob"] == expanded_df["is_cob_ending"]) + & (expanded_df["is_sandstone_or_limestone"] == expanded_df["is_sandstone_or_limestone_ending"]) ) ] elif component == "floor": expanded_df = expanded_df[ - (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"]) - & (expanded_df["is_solid"] == expanded_df["is_solid_ending"]) - & ( - expanded_df["another_property_below"] - == expanded_df["another_property_below_ending"] - ) - & ( - expanded_df["is_to_unheated_space"] - == expanded_df["is_to_unheated_space_ending"] - ) - & ( - expanded_df["is_to_external_air"] - == expanded_df["is_to_external_air_ending"] + starting_and_finishing_null | ( + (expanded_df["is_suspended"] == expanded_df["is_suspended_ending"]) + & (expanded_df["is_solid"] == expanded_df["is_solid_ending"]) + & (expanded_df["another_property_below"] == expanded_df["another_property_below_ending"]) + & (expanded_df["is_to_unheated_space"] == expanded_df["is_to_unheated_space_ending"]) + & (expanded_df["is_to_external_air"] == expanded_df["is_to_external_air_ending"]) ) ] elif component == "roof": expanded_df = expanded_df[ - (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"]) - & (expanded_df["is_roof_room"] == expanded_df["is_roof_room_ending"]) - & (expanded_df["is_loft"] == expanded_df["is_loft_ending"]) - & (expanded_df["is_flat"] == expanded_df["is_flat_ending"]) - & (expanded_df["is_thatched"] == expanded_df["is_thatched_ending"]) - & (expanded_df["is_at_rafters"] == expanded_df["is_at_rafters_ending"]) - & ( - expanded_df["has_dwelling_above"] - == expanded_df["has_dwelling_above_ending"] + starting_and_finishing_null | ( + (expanded_df["is_pitched"] == expanded_df["is_pitched_ending"]) + & (expanded_df["is_roof_room"] == expanded_df["is_roof_room_ending"]) + & (expanded_df["is_loft"] == expanded_df["is_loft_ending"]) + & (expanded_df["is_flat"] == expanded_df["is_flat_ending"]) + & (expanded_df["is_thatched"] == expanded_df["is_thatched_ending"]) + & (expanded_df["is_at_rafters"] == expanded_df["is_at_rafters_ending"]) + & (expanded_df["has_dwelling_above"] == expanded_df["has_dwelling_above_ending"]) ) ] @@ -677,7 +664,6 @@ class TrainingDataset(BaseDataset): } for component in components_to_expand: - # TODO: change cleaned dataframe to have underscores instead of dashes if component == "main-fuel": cleaned_key = "main-fuel" left_on_starting = "main_fuel_starting" @@ -695,10 +681,14 @@ class TrainingDataset(BaseDataset): cleaned_lookup_df_for_key = pd.DataFrame(cleaned_lookup[cleaned_key]) # We handle a specific edge case where we're missing information for the original description - descriptions = [x for x in self.df[left_on_starting].unique() if pd.notnull(x)] + descriptions = [ + x for x in self.df[left_on_starting].unique() if pd.notnull(x) + ] # take any not in the cleaned lookup missing_descriptions = [ - x for x in descriptions if x not in cleaned_lookup_df_for_key["original_description"].values + x + for x in descriptions + if x not in cleaned_lookup_df_for_key["original_description"].values ] if missing_descriptions: # We handle them here @@ -707,12 +697,22 @@ class TrainingDataset(BaseDataset): for x in missing_descriptions: desc_cleaner = cleaner(x) cleaned = desc_cleaner.process() + # IF NODATA, REMAP TO NONE VALUES, apart from walls which we want to keep as is + # If we convert the walls data to None, we end up converting booleans to None which + # causes issues downstream + if all( + (pd.DataFrame(cleaned, index=[0]).T)[0] == False + ) and component != "walls": + cleaned = {key: None for key in cleaned.keys()} cleaned_data.append( { "original_description": x, - "clean_description": desc_cleaner.description.replace("(assumed)", - "").rstrip().capitalize(), - **cleaned + "clean_description": desc_cleaner.description.replace( + "(assumed)", "" + ) + .rstrip() + .capitalize(), + **cleaned, } ) cleaned_lookup_df_for_key = pd.concat( @@ -830,9 +830,11 @@ class TrainingDataset(BaseDataset): if len(missings) == 0: return - # Make sure they are all efficiency columns + # + + # Make sure they are all efficiency columns if any(~missings.index.str.contains("energy_eff")): - raise ValueError("Non efficiency columns are missing") + raise ValueError(f"Non efficiency columns are missing {missings.index}") for m in missings.index: self.df[m] = self.df[m].fillna("NO_RATING") diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py index 0601d6ec..fac58cd9 100644 --- a/etl/epc/Pipeline.py +++ b/etl/epc/Pipeline.py @@ -23,6 +23,7 @@ from etl.epc.settings import ( POTENTIAL_COLUMNS, ROOM_FEATURES, COST_FEATURES, + POST_SAP10_FEATURE, ) # TODO: change in setting file @@ -76,6 +77,51 @@ new_walls_description_mapping.loc[ clean_lookup["walls-description"] = new_walls_description_mapping.to_dict( orient="records" ) +# TODO: THIS IS A TEMPORARY FIX +new_mainheatcont_mapping = pd.DataFrame(clean_lookup["mainheatcont-description"]) +new_mainheatcont_mapping.loc[ + new_mainheatcont_mapping["original_description"] == "SAP:Main-Heating-Controls", + [ + "thermostatic_control", + "charging_system", + "switch_system", + "no_control", + "dhw_control", + "community_heating", + "multiple_room_thermostats", + "auxiliary_systems", + "trvs", + "rate_control", + ], +] = None +new_mainheatcont_mapping.loc[ + new_mainheatcont_mapping["original_description"] == " ", + [ + "thermostatic_control", + "charging_system", + "switch_system", + "no_control", + "dhw_control", + "community_heating", + "multiple_room_thermostats", + "auxiliary_systems", + "trvs", + "rate_control", + ], +] = None +clean_lookup["mainheatcont-description"] = new_mainheatcont_mapping.to_dict( + orient="records" +) + +# TEMP FIX - GRANITE OR WHINSTONE BOOLEAN ISSUE +new_walls_description_mapping = pd.DataFrame(clean_lookup["walls-description"]) +new_walls_description_mapping.loc[ + new_walls_description_mapping["original_description"].str.contains("Granite"), + "is_granite_or_whinstone", +] = True +clean_lookup["walls-description"] = new_walls_description_mapping.to_dict( + orient="records" +) class EPCPipeline: @@ -280,7 +326,9 @@ class EPCPipeline: # We include the lodgement date here as we probably need to factor time into the # model, since EPC standards and rigour have changed over time - variable_data = property_data[VARIABLE_DATA_FEATURES + COST_FEATURES] + variable_data = property_data[ + VARIABLE_DATA_FEATURES + COST_FEATURES + POST_SAP10_FEATURE + ] uprn = str(uprn) epc_records = [ diff --git a/etl/epc/Record.py b/etl/epc/Record.py index d0816034..e1853361 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -20,6 +20,7 @@ from etl.epc.settings import ( COMPONENT_FEATURES, EFFICIENCY_FEATURES, ROOM_FEATURES, + POST_SAP10_FEATURE, ) from recommendations.recommendation_utils import estimate_number_of_floors from utils.s3 import read_dataframe_from_s3_parquet @@ -37,6 +38,8 @@ DATA_BUCKET = os.environ.get( "DATA_BUCKET", "retrofit-data-dev" if ENVIRONMENT == "dev" else None ) +pd.set_option("future.no_silent_downcasting", True) + @dataclass class EPCRecord: @@ -89,6 +92,7 @@ class EPCRecord: co2_emissions_current: float = None number_habitable_rooms: float = None number_heated_rooms: float = None + is_post_sap10: bool = None # u_values_walls = None # u_values_roof = None @@ -277,6 +281,7 @@ class EPCRecord: self.number_heated_rooms: float = float( self.prepared_epc["number_heated_rooms"] ) + self.is_post_sap10: bool = bool(self.prepared_epc["is_post_sap10"]) def _identify_delta_between_prepared_and_original_records(self): """ @@ -380,15 +385,15 @@ class EPCRecord: df.columns = [x.upper().replace("-", "_") for x in df.columns] if replace_empty_string: - df = df.replace("", np.nan) + df = df.replace("", np.nan).infer_objects(copy=False) return df def _clean_floor_height(self): - """ Remaps anomalies in floor height to the average floor height for the property type """ + """Remaps anomalies in floor height to the average floor height for the property type""" floor_height_data = self.cleaning_data[ - (self.cleaning_data["property_type"] == self.prepared_epc["property-type"]) & - (self.cleaning_data["built_form"] == self.prepared_epc["built-form"]) + (self.cleaning_data["property_type"] == self.prepared_epc["property-type"]) + & (self.cleaning_data["built_form"] == self.prepared_epc["built-form"]) ] average = floor_height_data["floor_height"].mean() sd = floor_height_data["floor_height"].std() @@ -399,14 +404,16 @@ class EPCRecord: self.prepared_epc["floor-height"] = average def _clean_new_build_descriptions(self): - for col in ['roof-description', 'walls-description', 'floor-description']: + for col in ["roof-description", "walls-description", "floor-description"]: self.prepared_epc[col] = self.prepared_epc[col].replace("W/m²K", "W/m-¦K") def _clean_constituency(self): """ We handle the single case of finding a missing constituency by using the local authority """ - if pd.isnull(self.prepared_epc["constituency"]) or (self.prepared_epc["constituency"] == ""): + if pd.isnull(self.prepared_epc["constituency"]) or ( + self.prepared_epc["constituency"] == "" + ): if self.prepared_epc["local-authority"] != "E06000044": raise NotImplementedError( "This function is only implemented for Portsmouth, in the single edgecase seen" @@ -589,18 +596,21 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") + if self.prepared_epc["total-floor-area"] is None: + return + self.prepared_epc["total-floor-area"] = float( self.prepared_epc["total-floor-area"] ) # We handle the edge case of floor area being 0. We set it to zero and it is cleaned by # _clean_with_data_processor - if self.prepared_epc['total-floor-area'] == 0: + if self.prepared_epc["total-floor-area"] == 0: print( "Edge case of floor area being zero - will set to none and will be cleaned in " "_clean_with_data_processor" ) - self.prepared_epc['total-floor-area'] = None + self.prepared_epc["total-floor-area"] = None def _clean_mains_gas(self): """ @@ -609,12 +619,7 @@ class EPCRecord: if not self.prepared_epc: raise ValueError("EPC Recrod doesn not contain epc data") - mains_gas_map = { - "Y": True, - "N": False, - True: True, - False: False - } + mains_gas_map = {"Y": True, "N": False, True: True, False: False} self.prepared_epc["mains-gas-flag"] = ( None @@ -1064,7 +1069,12 @@ class EPCDifferenceRecord: CARBON_RESPONSE ) - component_variables = COMPONENT_FEATURES + EFFICIENCY_FEATURES + ROOM_FEATURES + component_variables = ( + COMPONENT_FEATURES + + EFFICIENCY_FEATURES + + ROOM_FEATURES + + POST_SAP10_FEATURE + ) ending_record = self.record2.get( component_variables + ["lodgement_date"], return_asdict=True, diff --git a/etl/epc/property_change_app.py b/etl/epc/property_change_app.py index c985567d..cdb7cfb8 100644 --- a/etl/epc/property_change_app.py +++ b/etl/epc/property_change_app.py @@ -12,7 +12,7 @@ def main(): """ directories = [entry for entry in DATA_DIRECTORY.iterdir() if entry.is_dir()] - # directories = directories[0:3] + # directories = directories[235:275] epc_pipeline = EPCPipeline( directories=directories, diff --git a/etl/epc/settings.py b/etl/epc/settings.py index a641575c..f4d0e174 100644 --- a/etl/epc/settings.py +++ b/etl/epc/settings.py @@ -20,6 +20,7 @@ DATA_ANOMALY_MATCHES = { # certificate retrieval process is successfully completed. Mandatory data items cannot be applied # retrospectively to energy certificates lodged before the date of the change. "Not recorded", + "Not Recorded", # The data also contains DECs with an operational rating of ‘9999’ (a ‘default’ DEC). The production of a # ‘default’ DEC value was allowed to enable building occupiers, with poor quality or no energy data, # the opportunity to comply with the regulations. From April 2011 the ability to lodge a ‘default’ DEC was no @@ -49,9 +50,23 @@ DATA_ANOMALY_MATCHES = { # An older value which rarely shows up but has been seen in the data. "UNKNOWN", # - "Unknown" + "Unknown", + # Observed error cases + "(error), (error)", + "error , error", + "Description", + "description", + "Undefined Welsh description for crtrl code 2113", + "undefined welsh description for crtrl code 2113", + "Hot water system", + "hot water system", + "Heating system", + "heating system", } +# Add the post_sap10 date to indicate if the epc is post sap10 +POST_SAP10_DATE = "2025-06-22" + DATA_ANOMALY_SUBSTRINGS = { # Where values in a ‘pick’ list that have been superseded by another value. For example, where a value for # ‘pitched roof’ has been replaced by three sub-categories of pitched roof. The original value is retained @@ -184,6 +199,8 @@ EFFICIENCY_FEATURES = [ ROOM_FEATURES = ["number_habitable_rooms", "number_heated_rooms"] +POST_SAP10_FEATURE = ["is_post_sap10"] + COMPONENT_FEATURES = CORE_COMPONENT_FEATURES + [ "TRANSACTION_TYPE", "ENERGY_TARIFF", # Not sure if this is relevant diff --git a/etl/epc_clean/app.py b/etl/epc_clean/app.py index 1f320a9b..99de1d03 100644 --- a/etl/epc_clean/app.py +++ b/etl/epc_clean/app.py @@ -48,7 +48,6 @@ def app(): data.columns = [c.replace("_", "-").lower() for c in data.columns] # Take just date before the date threshold data = data[data["lodgement-date"] >= "2011-01-01"] - # Convert to list of dictioaries as returned by the api data = data.to_dict("records") diff --git a/etl/epc_clean/epc_attributes/FloorAttributes.py b/etl/epc_clean/epc_attributes/FloorAttributes.py index 6def93f0..cd1499c2 100644 --- a/etl/epc_clean/epc_attributes/FloorAttributes.py +++ b/etl/epc_clean/epc_attributes/FloorAttributes.py @@ -1,17 +1,27 @@ import re from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import extract_thermal_transmittance, extract_component_types +from etl.epc_clean.epc_attributes.attribute_utils import ( + extract_thermal_transmittance, + extract_component_types, + handle_mixed_translation +) class FloorAttributes(Definitions): DWELLING_BELOW = ["another dwelling below", "other premises below"] - FLOOR_TYPES = ["assumed", "to unheated space", "to external air", "suspended", "solid"] + FLOOR_TYPES = [ + "assumed", + "to unheated space", + "to external air", + "suspended", + "solid", + ] # For the short term, while we are still exploring the data, we maintain a list of error cases which # we want to ignore and consider as no data. - OBSERVED_ERRORS = ["Conservatory", "insulated"] + OBSERVED_ERRORS = ["Conservatory", "insulated", "Basement"] WELSH_TEXT = { "(anheddiad arall islaw)": "(another dwelling below)", @@ -35,32 +45,54 @@ class FloorAttributes(Definitions): "i ofod heb ei wresogi, heb ei inswleiddio (rhagdybiaeth)": "to unheated space, no insulation (assumed)", "i ofod heb ei wresogi, dim inswleiddio": "to unheated space, no insulation", "igçör awyr y tu allan, wedigçöi inswleiddio (rhagdybiaeth)": "to external air, insulated (assumed)", - "crog, inswleiddio cyfyngedig (rhagdybiaeth)": "suspended, limited insulation (assumed)" + "crog, inswleiddio cyfyngedig (rhagdybiaeth)": "suspended, limited insulation (assumed)", + } + + REMAP = { + # Have only seen this once - though perhaps need to investigate older EPCs in the production of EPC clean. + # When looking at a newer EPC, which had been re-assessed as another dwelling below + "above unheated space or full exposed": "(another dwelling below)", } def __init__(self, description: str): self.description: str = description.lower() - self.nodata = (not description) or (description in self.DATA_ANOMALY_MATCHES) or ( - description in self.OBSERVED_ERRORS) or (self.description == "sap05:floor") + self.nodata = ( + (not description) + or (description in self.DATA_ANOMALY_MATCHES) + or (description in self.OBSERVED_ERRORS) + or (self.description == "sap05:floor") + or not self.description + ) # Try and perform a translation, incase it's in welsh self.translate_welsh_text() + # Remap known issues + if self.description in self.REMAP: + self.description = self.REMAP[self.description] + + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + if not self.nodata and not any( - rt in self.description for rt in - self.FLOOR_TYPES + self.DWELLING_BELOW + ["average thermal transmittance"] + rt in self.description + for rt in self.FLOOR_TYPES + + self.DWELLING_BELOW + + ["average thermal transmittance"] ): - raise ValueError('Invalid description') + raise ValueError("Invalid description") def translate_welsh_text(self): uvalue_match = re.search( - r'trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k', self.description + r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k", + self.description, ) uvalue_match2 = re.search( - r'trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k', self.description + r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k", + self.description, ) # Step 2: Generalized translation with placeholder @@ -69,7 +101,7 @@ class FloorAttributes(Definitions): uvalue = uvalue_match.group(1) else: uvalue = uvalue_match2.group(1) - self.description = f'average thermal transmittance {uvalue} w/m-¦k' + self.description = f"average thermal transmittance {uvalue} w/m-¦k" else: translation = self.WELSH_TEXT.get(self.description) @@ -80,7 +112,11 @@ class FloorAttributes(Definitions): def process(self) -> Dict[str, Union[str, bool, int, None]]: if self.nodata: - return {"no_data": True} + return { + 'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': True, + 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': True, 'is_solid': False, + 'another_property_below': False, 'insulation_thickness': 'none', 'no_data': True + } result: Dict[str, Union[float, str, bool, None]] = {} description = self.description @@ -89,11 +125,17 @@ class FloorAttributes(Definitions): result, description = extract_thermal_transmittance(result, description) # floor type - result, description = extract_component_types(result, description, list_of_components=self.FLOOR_TYPES) + result, description = extract_component_types( + result, description, list_of_components=self.FLOOR_TYPES + ) # check if there is another dwelling below - result['another_property_below'] = "(another dwelling below)" in description or "(other premises below)" in \ - description + result["another_property_below"] = ( + "(another dwelling below)" in description + or "(other premises below)" in description + or "another dwelling below" in description + or "other premises below" in description + ) thickness_map = { "external insulation": "average", @@ -102,17 +144,17 @@ class FloorAttributes(Definitions): "partial insulation": "below average", "no insulation": "none", "additional insulation": "above average", - "insulated": "average" + "insulated": "average", } for key, value in thickness_map.items(): if key in description: - result['insulation_thickness'] = value + result["insulation_thickness"] = value break else: - result['insulation_thickness'] = None + result["insulation_thickness"] = None if result["another_property_below"]: result["thermal_transmittance"] = 0 - result["thermal_transmittance_unit"] = 'w/m-¦k' + result["thermal_transmittance_unit"] = "w/m-¦k" return result diff --git a/etl/epc_clean/epc_attributes/HotWaterAttributes.py b/etl/epc_clean/epc_attributes/HotWaterAttributes.py index d1124e08..53cd2f97 100644 --- a/etl/epc_clean/epc_attributes/HotWaterAttributes.py +++ b/etl/epc_clean/epc_attributes/HotWaterAttributes.py @@ -1,6 +1,6 @@ from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword +from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword, handle_mixed_translation class HotWaterAttributes(Definitions): @@ -100,6 +100,7 @@ class HotWaterAttributes(Definitions): WELSH_TEXT = { "ogçör brif system": "from main system", "o r brif system": "from main system", + "o’r brif system": "from main system", "ogçör brif system, adfer gwres nwyon ffliw": "from main system, flue gas heat recovery", "bwyler/cylchredydd nwy": "gas boiler/circulator", "ogçör brif system, dim thermostat ar y silindr": "from main system, no cylinder thermostat", @@ -153,6 +154,9 @@ class HotWaterAttributes(Definitions): self.nodata = False self.description = translation + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + if not self.nodata and not any( self._keyword_in_description(keywords) for keywords in [ diff --git a/etl/epc_clean/epc_attributes/LightingAttributes.py b/etl/epc_clean/epc_attributes/LightingAttributes.py index 78b31142..52cae764 100644 --- a/etl/epc_clean/epc_attributes/LightingAttributes.py +++ b/etl/epc_clean/epc_attributes/LightingAttributes.py @@ -1,6 +1,6 @@ import re from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description +from etl.epc_clean.epc_attributes.attribute_utils import clean_description, handle_mixed_translation from etl.epc_clean.utils import correct_spelling @@ -25,6 +25,9 @@ class LightingAttributes(Definitions): self.description = correct_spelling(self.description) self.averages = averages + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + self.nodata = (not description) or (description in self.DATA_ANOMALY_MATCHES) or ( description in self.OBSERVED_ERRORS) or (description == "SAP05:Lighting") diff --git a/etl/epc_clean/epc_attributes/MainFuelAttributes.py b/etl/epc_clean/epc_attributes/MainFuelAttributes.py index 9bb53ff1..a818a043 100644 --- a/etl/epc_clean/epc_attributes/MainFuelAttributes.py +++ b/etl/epc_clean/epc_attributes/MainFuelAttributes.py @@ -1,6 +1,8 @@ from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description, remove_punctuation, find_keyword +from etl.epc_clean.epc_attributes.attribute_utils import ( + clean_description, remove_punctuation, find_keyword, handle_mixed_translation +) class MainFuelAttributes(Definitions): @@ -56,6 +58,8 @@ class MainFuelAttributes(Definitions): def __init__(self, description: str): self.description: str = remove_punctuation(clean_description(description.lower())) + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) self.is_community = 'community' in self.description and 'not community' not in self.description self.is_unknown = False diff --git a/etl/epc_clean/epc_attributes/MainheatAttributes.py b/etl/epc_clean/epc_attributes/MainheatAttributes.py index 312fa9fe..283c4724 100644 --- a/etl/epc_clean/epc_attributes/MainheatAttributes.py +++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py @@ -1,5 +1,7 @@ from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description, process_part, switch_chars +from etl.epc_clean.epc_attributes.attribute_utils import ( + clean_description, process_part, switch_chars, handle_mixed_translation +) from typing import Dict, Union @@ -20,7 +22,7 @@ class MainHeatAttributes(Definitions): ] FUEL_TYPES = ["electric", "mains gas", "wood logs", "coal", "oil", "wood pellets", "anthracite", "dual fuel mineral and wood", "smokeless fuel", "lpg", "b30k", "mineral and wood", - "dual fuel appliance"] + "dual fuel appliance", "wood chips"] DISTRIBUTION_SYSTEMS = ["radiators", "fan coil units", "pipes in screed above insulation", "pipes in insulated timber floor", "pipes in concrete slab"] OTHERS = ["assumed", "electricaire", "assumed for most rooms"] @@ -77,7 +79,17 @@ class MainHeatAttributes(Definitions): 'awyr gynnes, nwy prif gyflenwad': 'warm air, mains gas', "bwyler a rheiddiaduron, nwy prif gyflenwad, gwresogyddion ystafell, trydan": "Boiler and radiators, " "mains gas, Room heaters, " - "electric" + "electric", + # an unusual example, containing both english and welsh that was found in the data + "boiler and radiators, |bwyler a rheiddiaduron, |mains gas|nwy prif gyflenwad": "boiler and radiators, " + "mains gas", + "room heaters, |gwresogyddion ystafell, |electric|trydan": "room heaters, electric", + "air source heat pump, |pwmp gwres sy'n tarddu yn yr awyr, |, radiators, |, rheiddiaduron, |electric|trydan": + "air source heat pump, radiators, electric", + "boiler and underfloor heating, |bwyler a gwres dan y llawr, |wood pellets|pelenni coed": "boiler and " + "underfloor " + "heating, " + "wood pellets", } REMAP = { @@ -95,6 +107,7 @@ class MainHeatAttributes(Definitions): "air sourceheat pump, radiators, electric": "air source heat pump, radiators, electric", "bwyler gyda rheiddiaduron a gwres dan y llawr, nwy prif gyflenwad": "Boiler and radiators, mains gas, " "Boiler and underfloor heating, mains gas", + } edge_case_result = {} @@ -115,6 +128,9 @@ class MainHeatAttributes(Definitions): self.nodata = False self.description = translation + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + remapped = [] for term in self.description.split(", "): remap = self.REMAP.get(term) diff --git a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py index 0dcf97c5..b9ef4eca 100644 --- a/etl/epc_clean/epc_attributes/MainheatControlAttributes.py +++ b/etl/epc_clean/epc_attributes/MainheatControlAttributes.py @@ -1,6 +1,6 @@ from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword +from etl.epc_clean.epc_attributes.attribute_utils import clean_description, find_keyword, handle_mixed_translation class MainheatControlAttributes(Definitions): @@ -119,7 +119,13 @@ class MainheatControlAttributes(Definitions): 'rheoli r tal a llaw': 'manual charge control', 'tal un gyfradd, thermostat ystafell yn unig': 'flat rate charging, room thermostat only', "rheoli'r t l llaw": "manual charge control", - "2205 rhaglennydd ac o leiaf ddau thermostat ystafell": "programmer and at least two room thermostats" + "2205 rhaglennydd ac o leiaf ddau thermostat ystafell": "programmer and at least two room thermostats", + "2603 rhaglennydd a thermostatau ar y cyfarpar": "programmer, room thermostat", + "2404 rheolyddion i wresogyddion storio sygçön cadw llawer o wres": "controls for high heat retention storage " + "heaters", + 'system dalu wedigçöi chysylltu +ó defnyddio gwres cymunedol, rhaglennydd ac o leiaf ddau thermostat ' + 'ystafell': 'charging system linked to use of community heating, programmer and at least two room thermostats' + } NO_DATA_DESCRIPTIONS = [ @@ -130,6 +136,8 @@ class MainheatControlAttributes(Definitions): def __init__(self, description: str): self.description: str = clean_description(description.lower()).strip() + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) self.nodata = not self.description or description in self.DATA_ANOMALY_MATCHES or ( description in self.NO_DATA_DESCRIPTIONS ) diff --git a/etl/epc_clean/epc_attributes/RoofAttributes.py b/etl/epc_clean/epc_attributes/RoofAttributes.py index 2eacc951..98998e5a 100644 --- a/etl/epc_clean/epc_attributes/RoofAttributes.py +++ b/etl/epc_clean/epc_attributes/RoofAttributes.py @@ -1,12 +1,29 @@ import re from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import extract_component_types, extract_thermal_transmittance +from etl.epc_clean.epc_attributes.attribute_utils import ( + extract_component_types, + extract_thermal_transmittance, + handle_mixed_translation +) class RoofAttributes(Definitions): - ROOF_TYPES = ['pitched', 'roof room', 'loft', 'flat', 'thatched', 'at rafters', 'assumed'] - DWELLING_ABOVE = ["another dwelling above", "other premises above", "other dwelling above"] + ROOF_TYPES = [ + "pitched", + "roof room", + "loft", + "flat", + "thatched", + "at rafters", + "assumed", + ] + DWELLING_ABOVE = [ + "another dwelling above", + "other premises above", + "other dwelling above", + "(same dwelling above)", + ] WELSH_TEXT = { "ar oleddf, dim inswleiddio": "pitched, no insulation", @@ -18,10 +35,10 @@ class RoofAttributes(Definitions): "ar oleddf, wedi?i inswleiddio": "pitched, insulated", "ar oleddf, inswleiddio cyfyngedig (rhagdybiaeth)": "pitched, limited insulation (assumed)", "ar oleddf, inswleiddio cyfyngedig": "pitched, limited insulation", - "ar oleddf, wedigçöi inswleiddio wrth y trawstiau": 'pitched, insulated at rafters', - "ar oleddf, wedi?i inswleiddio wrth y trawstiau": 'pitched, insulated at rafters', - "ar oleddf, wedi?i inswleiddio wrth y trawstia": 'pitched, insulated at rafters', - "ar oleddf, wedigçöi inswleiddio wrth y trawstia": 'pitched, insulated at rafters', + "ar oleddf, wedigçöi inswleiddio wrth y trawstiau": "pitched, insulated at rafters", + "ar oleddf, wedi?i inswleiddio wrth y trawstiau": "pitched, insulated at rafters", + "ar oleddf, wedi?i inswleiddio wrth y trawstia": "pitched, insulated at rafters", + "ar oleddf, wedigçöi inswleiddio wrth y trawstia": "pitched, insulated at rafters", "yn wastad, inswleiddio cyfyngedig (rhagdybiaeth)": "flat, limited insulation (assumed)", "yn wastad, inswleiddio cyfyngedig": "flat, limited insulation", "yn wastad, dim inswleiddio (rhagdybiaeth)": "flat, no insulation (assumed)", @@ -43,9 +60,18 @@ class RoofAttributes(Definitions): } DEFAULT_KEYS = [ - 'thermal_transmittance', 'thermal_transmittance_unit', 'is_pitched', 'is_roof_room', - 'is_loft', 'is_flat', 'is_thatched', 'is_at_rafters', 'is_assumed', 'has_dwelling_above', - 'is_valid', 'insulation_thickness' + "thermal_transmittance", + "thermal_transmittance_unit", + "is_pitched", + "is_roof_room", + "is_loft", + "is_flat", + "is_thatched", + "is_at_rafters", + "is_assumed", + "has_dwelling_above", + "is_valid", + "insulation_thickness", ] def __init__(self, description: str): @@ -54,14 +80,23 @@ class RoofAttributes(Definitions): """ self.description: str = description.lower().strip() - self.nodata = not description or description in self.DATA_ANOMALY_MATCHES or self.description == "sap05:roof" + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + self.nodata = ( + not description + or description in self.DATA_ANOMALY_MATCHES + or self.description == "sap05:roof" + ) self.welsh_translation_search() if not self.nodata and not any( - rt in self.description for rt in self.ROOF_TYPES + self.DWELLING_ABOVE + ["average thermal transmittance"] + rt in self.description + for rt in self.ROOF_TYPES + + self.DWELLING_ABOVE + + ["average thermal transmittance"] ): - raise ValueError('Invalid description') + raise ValueError("Invalid description") def welsh_translation_search(self): """ @@ -76,7 +111,7 @@ class RoofAttributes(Definitions): r"ar oleddf, (\d+ mm) lo inswleiddio yn y llof", r"ar oleddf, (\d+\+ mm) lo inswleiddio yn y llof", r"ar oleddf, (\d+mm) o inswleiddio yn y llofft", - r"ar oleddf, (\d+\+ mm) o inswleiddio yn y llofft" + r"ar oleddf, (\d+\+ mm) o inswleiddio yn y llofft", ] li_thickness_match = None for regex in loft_insulation_regexes: @@ -84,9 +119,14 @@ class RoofAttributes(Definitions): if li_thickness_match: break - uvalue_search = re.search(r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k", self.description) + uvalue_search = re.search( + r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m-¦k", + self.description, + ) uvalue_search2 = re.search( - r'trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k', self.description, re.IGNORECASE + r"trawsyriannedd thermol cyfartalog (\d+(\.\d+)?)\s*w/m.+k", + self.description, + re.IGNORECASE, ) # Step 2: Generalized translation with placeholder @@ -121,9 +161,13 @@ class RoofAttributes(Definitions): result, description = extract_thermal_transmittance(result, description) # roof type - result, description = extract_component_types(result, description, list_of_components=self.ROOF_TYPES) + result, description = extract_component_types( + result, description, list_of_components=self.ROOF_TYPES + ) - result["has_dwelling_above"] = any([x in description for x in self.DWELLING_ABOVE]) + result["has_dwelling_above"] = any( + [x in description for x in self.DWELLING_ABOVE] + ) for dwelling_above in self.DWELLING_ABOVE: description = description.replace(dwelling_above, "") @@ -136,7 +180,7 @@ class RoofAttributes(Definitions): # Search for a regular expression that matches 150 insulation match = re.search(r"(\d+\+?)\s*insulation", description) if match: - result['insulation_thickness'] = match.group(1) + result["insulation_thickness"] = match.group(1) # insulation thickness thickness_map = { @@ -149,21 +193,21 @@ class RoofAttributes(Definitions): } for key, value in thickness_map.items(): if key in description: - result['insulation_thickness'] = value + result["insulation_thickness"] = value # Remove the match from the description # description = description.replace(key, "") break # Extract insulation thickness in mm, if present - match = re.search(r'(\d+\+?)\s*mm', description) + match = re.search(r"(\d+\+?)\s*mm", description) if match: - result['insulation_thickness'] = match.group(1) + result["insulation_thickness"] = match.group(1) if "insulation_thickness" not in result: - result['insulation_thickness'] = None + result["insulation_thickness"] = None if result["has_dwelling_above"]: result["thermal_transmittance"] = 0 - result["thermal_transmittance_unit"] = 'w/m-¦k' + result["thermal_transmittance_unit"] = "w/m-¦k" return result diff --git a/etl/epc_clean/epc_attributes/WallAttributes.py b/etl/epc_clean/epc_attributes/WallAttributes.py index 49252552..075dee96 100644 --- a/etl/epc_clean/epc_attributes/WallAttributes.py +++ b/etl/epc_clean/epc_attributes/WallAttributes.py @@ -3,76 +3,104 @@ from typing import Dict, Union from BaseUtility import Definitions from etl.epc_clean.epc_attributes.attribute_utils import ( extract_component_types, - extract_thermal_transmittance + extract_thermal_transmittance, + handle_mixed_translation ) class WallAttributes(Definitions): - WALL_TYPES = ['cavity wall', 'filled cavity', 'solid brick', 'system built', 'timber frame', 'granite or whinstone', - 'as built', 'cob', 'assumed', 'sandstone or limestone', "park home"] + WALL_TYPES = [ + "cavity wall", + "filled cavity", + "solid brick", + "system built", + "timber frame", + "granite or whinstone", + "as built", + "cob", + "assumed", + "sandstone or limestone", + "park home", + ] WELSH_TEXT = { - "Briciau solet, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": - "Solid brick, as built, no insulation (assumed)", - 'Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)': - 'Cavity wall, as built, partial insulation (assumed)', - 'Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol': - 'Cavity wall, as built, partial insulation', - 'Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': - 'Cavity wall, as built, no insulation (assumed)', - 'Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio': - 'Cavity wall, as built, no insulation', - 'Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': - 'Sandstone or limestone, as built, no insulation (assumed)', - 'Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio': - 'Sandstone or limestone, as built, no insulation', - 'Waliau ceudod, ceudod wediGÇÖi lenwi': 'Cavity wall, filled cavity', - 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)': - 'Cavity wall, as built, insulated (assumed)', - 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio': - 'Cavity wall, as built, insulated', - 'Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': - 'Granite or whinstone, as built, no insulation (assumed)', - 'Waliau ceudod,': 'Cavity wall, as built, no insulation', - 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)': - 'Timber frame, as built, insulated (assumed)', - 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio': - 'Timber frame, as built, insulated', - 'Gwenithfaen neu risgraig, gydag inswleiddio allanol': 'Granite or whinstone, with external insulation', - 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': - 'System built, as built, no insulation (assumed)', - 'Tywodfaen, gydag inswleiddio mewnol': 'Sandstone or limestone, with internal insulation', - 'Waliau ceudod, ynysydd allanol a llenwi ceudod': 'Cavity wall, filled cavity and external insulation', - 'Gwenithfaen neu risgraig, gydag inswleiddio mewnol': 'Granite or whinstone, with internal insulation', - 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)': - 'Timber frame, as built, partial insulation (assumed)', - 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)': - 'System built, as built, insulated (assumed)', - 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio': - 'System built, as built, insulated', - 'WediGÇÖu hadeiladu yn +¦l system, gydag inswleiddio allanol': 'System built, with external insulation', - 'Briciau solet, gydag inswleiddio mewnol': 'Solid brick, with internal insulation', - 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)': - 'System built, as built, partial insulation (assumed)', - 'WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol': - 'System built, as built, partial insulation', - 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)': - 'Timber frame, as built, no insulation (assumed)', - 'Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio': - 'Timber frame, as built, no insulation', - 'Tywodfaen, gydag inswleiddio allanol': 'Sandstone or limestone, with external insulation', - 'Waliau ceudod, gydag inswleiddio allanol': 'Cavity wall, with external insulation', - 'Briciau solet, gydag inswleiddio allanol': 'Solid brick, with external insulation', + "Briciau solet, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Solid brick, as built, no insulation " + "(assumed)", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Cavity wall, as built, " + "partial insulation (assumed)", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "Cavity wall, as built, partial insulation", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Cavity wall, as built, no insulation" + " (assumed)", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Cavity wall, as built, no insulation", + "Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Sandstone or limestone, as built, " + "no insulation (assumed)", + "Tywodfaen, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Sandstone or limestone, as built, no insulation", + "Waliau ceudod, ceudod wediGÇÖi lenwi": "Cavity wall, filled cavity", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, " + "insulated (assumed)", + "Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Cavity wall, as built, insulated", + "Waliau ceudod, fel y’u hadeiladwyd, wedi’u hinswleiddio (rhagdybiaeth)": "Cavity wall, as built, insulated (" + "assumed)", + "Gwenithfaen neu risgraig, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Granite or whinstone, " + "as built, no insulation (" + "assumed)", + "Waliau ceudod,": "Cavity wall, as built, no insulation", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "Timber frame, as built, " + "insulated (assumed)", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "Timber frame, as built, insulated", + "Gwenithfaen neu risgraig, gydag inswleiddio allanol": "Granite or whinstone, with external insulation", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "System built, " + "as built, " + "no insulation (" + "assumed)", + "Tywodfaen, gydag inswleiddio mewnol": "Sandstone or limestone, with internal insulation", + "Waliau ceudod, ynysydd allanol a llenwi ceudod": "Cavity wall, filled cavity and external insulation", + "Gwenithfaen neu risgraig, gydag inswleiddio mewnol": "Granite or whinstone, with internal insulation", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "Timber frame, as built, " + "partial insulation (assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)": "System " + "built, " + "as built, " + "insulated (" + "assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio": "System built, as built, " + "insulated", + "WediGÇÖu hadeiladu yn +¦l system, gydag inswleiddio allanol": "System built, with external insulation", + "Briciau solet, gydag inswleiddio mewnol": "Solid brick, with internal insulation", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol (rhagdybiaeth)": "System built, " + "as built, " + "partial " + "insulation (" + "assumed)", + "WediGÇÖu hadeiladu yn +¦l system, fel yGÇÖu hadeiladwyd, inswleiddio rhannol": "System built, as built, " + "partial insulation", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio (rhagdybiaeth)": "Timber frame, as built, no insulation " + "(assumed)", + "Ffr+óm bren, fel yGÇÖu hadeiladwyd, dim inswleiddio": "Timber frame, as built, no insulation", + "Tywodfaen, gydag inswleiddio allanol": "Sandstone or limestone, with external insulation", + "Waliau ceudod, gydag inswleiddio allanol": "Cavity wall, with external insulation", + "Briciau solet, gydag inswleiddio allanol": "Solid brick, with external insulation", # Add in some corrections: - 'Co with external insulation': 'Cob, with external insulation', - 'Cowith external insulation': 'Cob, with external insulation', + "Co with external insulation": "Cob, with external insulation", + "Cowith external insulation": "Cob, with external insulation", } DEFAULT_KEYS = [ - 'thermal_transmittance', 'thermal_transmittance_unit', 'is_cavity_wall', 'is_filled_cavity', - 'is_solid_brick', 'is_system_built', 'is_timber_frame', 'is_granite_or_whinstone', - 'is_as_built', 'is_cob', 'is_assumed', 'is_sandstone_or_limestone', - 'insulation_thickness', 'external_insulation', 'internal_insulation' + "thermal_transmittance", + "thermal_transmittance_unit", + "is_cavity_wall", + "is_filled_cavity", + "is_solid_brick", + "is_system_built", + "is_timber_frame", + "is_granite_or_whinstone", + "is_as_built", + "is_cob", + "is_assumed", + "is_sandstone_or_limestone", + "insulation_thickness", + "external_insulation", + "internal_insulation", ] CORRECTIONS = { @@ -90,6 +118,9 @@ class WallAttributes(Definitions): self.welsh_translation_search() + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + self.nodata = not description or description in self.DATA_ANOMALY_MATCHES def welsh_translation_search(self): @@ -98,7 +129,9 @@ class WallAttributes(Definitions): :return: """ - uvalue_search = re.search(r"Trawsyriannedd thermol cyfartalog (\d+\.?\d*)", self.description) + uvalue_search = re.search( + r"Trawsyriannedd thermol cyfartalog (\d+\.?\d*)", self.description + ) if uvalue_search: uvalue = uvalue_search.group(1) @@ -115,6 +148,9 @@ class WallAttributes(Definitions): for key in self.DEFAULT_KEYS: result[key] = False + result["thermal_transmittance_unit"] = None + result["insulation_thickness"] = "none" + return result description = self.description.lower() @@ -123,7 +159,9 @@ class WallAttributes(Definitions): result, description = extract_thermal_transmittance(result, description) # wall type - result, description = extract_component_types(result, description, list_of_components=self.WALL_TYPES) + result, description = extract_component_types( + result, description, list_of_components=self.WALL_TYPES + ) # Handle some edge cases if "sandstone" in description and not result["is_sandstone_or_limestone"]: @@ -137,18 +175,18 @@ class WallAttributes(Definitions): "partial insulation": "below average", "no insulation": "none", "additional insulation": "above average", - "insulated": "average" + "insulated": "average", } for key, value in thickness_map.items(): if key in description: - result['insulation_thickness'] = value + result["insulation_thickness"] = value break else: - result['insulation_thickness'] = None + result["insulation_thickness"] = None # insulation type - result['external_insulation'] = 'external insulation' in description - result['internal_insulation'] = 'internal insulation' in description + result["external_insulation"] = "external insulation" in description + result["internal_insulation"] = "internal insulation" in description if result["is_filled_cavity"]: # If it has a filled cavity + internal/external insulation, it's deemed to have above average insulation @@ -159,7 +197,11 @@ class WallAttributes(Definitions): else: result["insulation_thickness"] = "average" - if result["is_cavity_wall"] & result["is_as_built"] & (result["insulation_thickness"] == "average"): + if ( + result["is_cavity_wall"] + & result["is_as_built"] + & (result["insulation_thickness"] == "average") + ): result["is_filled_cavity"] = True return result diff --git a/etl/epc_clean/epc_attributes/WindowAttributes.py b/etl/epc_clean/epc_attributes/WindowAttributes.py index 2b1dc172..f5edac2d 100644 --- a/etl/epc_clean/epc_attributes/WindowAttributes.py +++ b/etl/epc_clean/epc_attributes/WindowAttributes.py @@ -1,6 +1,6 @@ from typing import Dict, Union from BaseUtility import Definitions -from etl.epc_clean.epc_attributes.attribute_utils import clean_description +from etl.epc_clean.epc_attributes.attribute_utils import clean_description, handle_mixed_translation class WindowAttributes(Definitions): @@ -53,6 +53,9 @@ class WindowAttributes(Definitions): self.nodata = False self.description = translation + # We handle seemind occurances of mixed translations + self.description = handle_mixed_translation(self.description) + if not self.nodata: if not any( rt in self.description for rt in diff --git a/etl/epc_clean/epc_attributes/attribute_utils.py b/etl/epc_clean/epc_attributes/attribute_utils.py index 28f958a8..2e98d869 100644 --- a/etl/epc_clean/epc_attributes/attribute_utils.py +++ b/etl/epc_clean/epc_attributes/attribute_utils.py @@ -155,3 +155,19 @@ def find_keyword(description, keywords, synonyms=None): return synonyms.get(keyword, keyword) return None + + +def handle_mixed_translation(description): + """ + We handle some edge cases where welsh and english are mixed in the same description + :param description: str description to process + :return: + """ + if "|" not in description: + return description + + parts = description.split("|") + # The pattern that we see is that in index 0, we have english. Then welsh and then english again + # So, the even indexes are english + remapped_parts = [parts[i] for i in range(len(parts)) if i % 2 == 0] + return "".join(remapped_parts) diff --git a/etl/epc_clean/tests/test_data/test_floor_attributes_cases.py b/etl/epc_clean/tests/test_data/test_floor_attributes_cases.py index 81ec7a32..080f59be 100644 --- a/etl/epc_clean/tests/test_data/test_floor_attributes_cases.py +++ b/etl/epc_clean/tests/test_data/test_floor_attributes_cases.py @@ -375,6 +375,12 @@ clean_floor_cases = [ 'thermal_transmittance_unit': 'w/m-¦k', 'is_assumed': False, 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False, 'another_property_below': False, 'insulation_thickness': None + }, + { + # This example gets remapped to another dwelling below + "description": "Above unheated space or full exposed", + 'thermal_transmittance': 0, 'thermal_transmittance_unit': 'w/m-¦k', 'is_assumed': False, + 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False, + 'another_property_below': True, 'insulation_thickness': None } - ] diff --git a/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py b/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py index 45994b1d..e1939a7d 100644 --- a/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py +++ b/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py @@ -1752,6 +1752,21 @@ mainheat_cases = [ 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_mineral_and_wood': False, 'has_dual_fuel_appliance': False, 'has_assumed': False, 'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False - + }, + { + 'original_description': 'Boiler and radiators, wood chips', + 'has_radiators': True, 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False, + 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True, + 'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False, + 'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False, + 'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False, + 'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, 'has_electric_heat_pump': False, + 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, 'has_exhaust_source_heat_pump': False, + 'has_community_heat_pump': False, 'has_hot-water-only': False, 'has_electric': False, 'has_mains_gas': False, + 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False, 'has_anthracite': False, + 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, + 'has_mineral_and_wood': False, 'has_dual_fuel_appliance': False, 'has_wood_chips': True, 'has_assumed': False, + 'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False } + ] diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index 21794284..cf6659f9 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -1,7 +1,8 @@ import time import re -import pandas as pd import requests +import pandas as pd +from copy import deepcopy from bs4 import BeautifulSoup from datetime import datetime @@ -21,18 +22,28 @@ class RetrieveFindMyEpc: 'Chrome/111.0.0.0 Safari/537.36' } - def __init__(self, address: str, postcode: str): + def __init__( + self, address: str, postcode: str, rrn: str = None, address_postal_town: str = "", sap_rating: int = None + ): """ This class is tasked with retrieving the latest EPC data from the find my epc website :param address: The address of the property :param postcode: The postcode of the property + :param rrn: The RRN of the EPC (if known) """ self.address = address self.postcode = postcode + self.rrn = rrn self.address_cleaned = self.address.replace(",", "").replace(" ", "").lower() self.walls = [] + self.address_postal_town = address_postal_town + if self.address_postal_town: + self.address_postal_town = self.address_postal_town.replace(",", "").replace(" ", "").lower() + + self.sap_rating = sap_rating + @staticmethod def extract_low_carbon_sources(soup): # Find the section header @@ -73,6 +84,12 @@ class RetrieveFindMyEpc: def get_feature_row_text(feature_name, index=0): matches = [row for row in rows if row.find("th") and feature_name in row.find("th").text] if len(matches) > index: + # A commonly seen case is when feature_name is Main heating and we want to make sure we get + # main heating and not main heating control + if feature_name == "Main heating": + matches = [ + row for row in matches if row.find("th") and row.find("th").text.strip() == "Main heating" + ] cells = matches[index].find_all("td") description = self.get_text(cells[0]) rating = self.get_text(cells[1]) @@ -280,54 +297,12 @@ class RetrieveFindMyEpc: :return: """ - postcode_input = self.postcode.replace(" ", "+") - postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input) - postcode_response = requests.get(postcode_search, headers=self.HEADERS) - - postcode_res = BeautifulSoup(postcode_response.text, features="html.parser") - rows = postcode_res.find_all('tr', class_='govuk-table__row') - - extracted_table = [] - for row in rows: - # Extract the address and URL - address_tag = row.find('a', class_='govuk-link') - if address_tag is None: - continue - extracted_address = None - extracted_address_url = None - if address_tag: - extracted_address = address_tag.text.strip() - extracted_address_url = address_tag['href'] - - extracted_address_cleaned = extracted_address.replace(",", "").replace(" ", "").lower() - if not extracted_address_cleaned.startswith(self.address_cleaned): - continue - - # If the address is a match, we can extract the data - - # Extract the expiry date - expiry_date_tag = row.find('td', class_='govuk-table__cell date') - expiry_date = None - if expiry_date_tag is not None: - expiry_date = expiry_date_tag.parent.find('span').text.strip() - - extracted_table.append( - { - "extracted_address": extracted_address, - "extracted_address_url": extracted_address_url, - "expiry_date": datetime.strptime(expiry_date, '%d %B %Y'), - } - ) - - if not extracted_table: - raise ValueError("No EPC found") - - if len(extracted_table) > 1: - # We take the one with the most recent expiry date - extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True) - - chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url'] - epc_certificate = chosen_epc.split('/')[-1] + if self.rrn: + # We build the URL directly + epc_certificate = self.rrn + chosen_epc = f"{self.BASE_ENERGY_URL}/energy-certificate/{epc_certificate}" + else: + chosen_epc, epc_certificate = self._find_epc_page() address_response = requests.get(chosen_epc, headers=self.HEADERS) address_res = BeautifulSoup(address_response.text, features="html.parser") @@ -365,9 +340,12 @@ class RetrieveFindMyEpc: return all_find_my_epc_data - def retrieve_newest_find_my_epc_data(self, sap_2012_date=None): + def _find_epc_page(self): """ - For a post code and address, we pull out all the required data from the find my epc website + This function is used to find the EPC page source for a given address and postcode. + It is done by fetching the page, associating to the postcode and then matching the + addresses on the page to the address we have been given. + :return: """ postcode_input = self.postcode.replace(" ", "+") @@ -377,7 +355,7 @@ class RetrieveFindMyEpc: postcode_res = BeautifulSoup(postcode_response.text, features="html.parser") rows = postcode_res.find_all('tr', class_='govuk-table__row') - extracted_table = [] + extracted_table, backup_flat = [], [] for row in rows: # Extract the address and URL address_tag = row.find('a', class_='govuk-link') @@ -392,7 +370,24 @@ class RetrieveFindMyEpc: extracted_address_cleaned = ( extracted_address.replace(",", "").replace(" ", "").lower() ) - if not extracted_address_cleaned.startswith(self.address_cleaned): + + no_primary_match = not extracted_address_cleaned.startswith(self.address_cleaned) + no_backup_match = True if not self.address_postal_town else not ( + extracted_address_cleaned.startswith(self.address_postal_town) + ) + + if no_primary_match and no_backup_match: + if self.address_cleaned.startswith("flat"): + # We have a flat address, so we can try and match without the flat number + flat_removed_address = self.address_cleaned[4:] + if extracted_address_cleaned.startswith(flat_removed_address): + # We have a backup match + backup_flat.append( + { + "extracted_address": extracted_address, + "extracted_address_url": extracted_address_url, + } + ) continue # If the address is a match, we can extract the data @@ -411,8 +406,14 @@ class RetrieveFindMyEpc: } ) + if not extracted_table and not backup_flat: + # This is a relatively new change, as of November 2025, but we see cases where properties do not + # have data appearing on the find my EPC website, particularly for older EPCs. In this case, we allo + # for us to not find any information and return nothing + return None, None + if not extracted_table: - raise ValueError("No EPC found") + extracted_table = deepcopy(backup_flat) if len(extracted_table) > 1: # We take the one with the most recent expiry date @@ -421,8 +422,35 @@ class RetrieveFindMyEpc: chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url'] epc_certificate = chosen_epc.split('/')[-1] - address_response = requests.get(chosen_epc, headers=self.HEADERS) - address_res = BeautifulSoup(address_response.text, features="html.parser") + return chosen_epc, epc_certificate + + def retrieve_newest_find_my_epc_data( + self, sap_2012_date=None, return_page=False, epc_page_source=None, rrn=None + ): + """ + For a post code and address, we pull out all the required data from the find my epc website + """ + + if epc_page_source is None and rrn is None: + chosen_epc, rrn = self._find_epc_page() + if chosen_epc is None: + # We have no resulting data + logger.info("No EPC found for address %s, postcode %s", self.address, self.postcode) + return {} + + address_response = requests.get(chosen_epc, headers=self.HEADERS) + epc_page_source = address_response.text + address_res = BeautifulSoup(address_response.text, features="html.parser") + elif self.rrn: + epc_certificate = self.rrn + chosen_epc = f"{self.BASE_ENERGY_URL}/energy-certificate/{epc_certificate}" + address_response = requests.get(chosen_epc, headers=self.HEADERS) + epc_page_source = address_response.text + address_res = BeautifulSoup(address_response.text, features="html.parser") + else: + if rrn is None: + raise ValueError("rrn must be provided if epc_page_source is provided") + address_res = BeautifulSoup(epc_page_source, features="html.parser") # Key data we want to retrieve: # 1) Rating @@ -437,6 +465,14 @@ class RetrieveFindMyEpc: potential_rating = ratings.split(".")[1] current_sap = int(current_rating.split(' ')[-1]) + if self.sap_rating: + if current_sap != self.sap_rating: + # This means we likely have the wrong data. If we are in this scenario, we return nothing + return { + "epc_certificate": None, + "page_source": None, + } + # Retrieve the energy consumption bills = address_res.find('div', {'id': 'bills-affected'}) bills_list = bills.find_all('li') @@ -557,8 +593,21 @@ class RetrieveFindMyEpc: # 5) Pull out the EPC data epc_data = self.extract_epc_data(address_res) + # Pull out the address information which can be found in the box with the class "epc-address" + # We split it up on break tags + addr = address_res.find("p", class_="epc-address").get_text(separator="\n").strip() + lines = addr.split("\n") + if len(lines) > 2: + address1 = lines[0] + address2 = lines[1] + postcode = lines[-1] + else: + address1 = lines[0] + address2 = "" + postcode = lines[-1] + resulting_data = { - 'epc_certificate': epc_certificate, + 'epc_certificate': rrn, 'current_epc_rating': current_rating.split(' ')[-6], 'current_epc_efficiency': current_sap, 'potential_epc_rating': potential_rating.split(' ')[-6], @@ -569,8 +618,17 @@ class RetrieveFindMyEpc: "epc_data": epc_data, **assessment_data, **low_carbon_energy_sources, + "page_source": epc_page_source, + # Add in address a postcode from the page - covers use cases where we are given RRN + "address1": address1, + "address2": address2, + "postcode": postcode, } + if return_page: + # We return the page text as well, which can be parsed again later + return resulting_data, epc_page_source + return resulting_data def format_recommendations(self, recommendations, assessment_data, sap_2012_date=None): @@ -606,7 +664,7 @@ class RetrieveFindMyEpc: "roomstat_programmer_trvs", "time_temperature_zone_control" ], "Change heating to gas condensing boiler": ["boiler_upgrade"], - "Fan assisted storage heaters and dual immersion cylinder": ["high_heat_retention_storage_heater"], + "Fan assisted storage heaters and dual immersion cylinder": ["high_heat_retention_storage_heaters"], "Flat roof or sloping ceiling insulation": ["flat_roof_insulation"], "Heating controls (room thermostat)": [ "roomstat_programmer_trvs", "time_temperature_zone_control" @@ -627,6 +685,7 @@ class RetrieveFindMyEpc: "Heating controls recommendation": [], "Replace boiler with Band A condensing boiler": ["boiler_upgrade"], "Band A condensing gas boiler": ["boiler_upgrade"], + "Install Band A condensing heating unit": ["boiler_upgrade"], "Solar panel recommendation": [], "Double glazing recommendation": [], "Solid wall insulation recommendation": [], @@ -634,7 +693,7 @@ class RetrieveFindMyEpc: "PV Cells recommendation": [], "Replacement glazing units": ["double_glazing"], "Heating controls (time and temperature zone control)": ["time_temperature_zone_control"], - "High heat retention storage heaters": ["high_heat_retention_storage_heater"], + "High heat retention storage heaters": ["high_heat_retention_storage_heaters"], "Gas condensing boiler": ["boiler_upgrade"], "Change room heaters to condensing boiler": ["boiler_upgrade"], "Cylinder thermostat": ["cylinder_thermostat"], @@ -677,14 +736,16 @@ class RetrieveFindMyEpc: ], "Internal wall insulation": ["internal_wall_insulation"], "High heat retention storage heaters and dual immersion cylinder and dual rate meter": [ - "high_heat_retention_storage_heater" + "high_heat_retention_storage_heaters" ], "High heat retention storage heaters and dual rate meter": [ - "high_heat_retention_storage_heater" + "high_heat_retention_storage_heaters" ], "Increase loft insulation to 250mm": ["loft_insulation"], "Solar photovoltaics panels, 25% of roof area": ["solar_pv"], 'Air or ground source heat pump': ["air_source_heat_pump"], + "Add PV Battery": ["solar_pv_battery"], + "Add PV diverter": ["solar_pv_diverter"], # Don't have a recommendation yet } survey = True @@ -711,37 +772,81 @@ class RetrieveFindMyEpc: return formatted_recommendations @classmethod - def get_from_epc(cls, epc): - # Attempt both methods: - try: - searcher = cls(address=epc["address"], postcode=epc["postcode"]) - find_epc_data = searcher.retrieve_newest_find_my_epc_data() - except Exception as e: - logger.error(f"Error retrieving find my epc data: {e}") - if epc["address1"] == epc["address"]: - # There's no benefit of using the same address, so we split on comma - address1 = epc["address"].split(",")[0] - else: - address1 = epc["address1"] - # We attempt with the backup add - searcher = cls(address=address1, postcode=epc["postcode"]) - find_epc_data = searcher.retrieve_newest_find_my_epc_data() - logger.info("Successfully retrieved find my epc data using backup address") + def get_from_epc(cls, epc, epc_page_source=None, rrn=None, address_postal_town=None, sap_rating=None): + + if epc_page_source is not None and rrn is None: + raise ValueError("rrn must be provided if epc_page_source is provided") + + searcher = cls( + address=epc["address"], postcode=epc["postcode"], address_postal_town=address_postal_town, + sap_rating=sap_rating + ) + find_epc_data = searcher.retrieve_newest_find_my_epc_data(epc_page_source=epc_page_source, rrn=rrn) non_invasive_recommendations = { "uprn": epc["uprn"], "address": epc["address"], "postcode": epc["postcode"], - "recommendations": find_epc_data["recommendations"], + "recommendations": find_epc_data.get("recommendations", []), } # We need to add the patch information patch = { - "current-energy-rating": find_epc_data["current_epc_rating"], - "current-energy-efficiency": find_epc_data["current_epc_efficiency"], - "potential-energy-rating": find_epc_data["potential_epc_rating"], - "potential-energy-efficiency": find_epc_data["potential_epc_efficiency"], - **find_epc_data["epc_data"], + "current-energy-rating": find_epc_data.get("current_epc_rating"), + "current-energy-efficiency": find_epc_data.get("current_epc_efficiency"), + "potential-energy-rating": find_epc_data.get("potential_epc_rating"), + "potential-energy-efficiency": find_epc_data.get("potential_epc_efficiency"), + **find_epc_data.get("epc_data", {}), } - return non_invasive_recommendations, patch + page_source = { + "rrn": find_epc_data.get("epc_certificate"), + "page_source": find_epc_data.get("page_source") + } + + return non_invasive_recommendations, patch, page_source + + @classmethod + def get_from_epc_with_fallback( + cls, epc, epc_page, rrn, cleaned_address=None, config_address=None, address_postal_town=None + ): + """ + Attempt get_from_epc with: + 1) Original EPC + 2) EPC with cleaned address + 3) EPC with configured address + in that order. + """ + + # The data we'll use to attempt retrieval + # 1) Original + attempts = [epc] + + # 2) Cleaned + if cleaned_address: + modified = deepcopy(epc) + for k in ["address", "address1"]: + modified[k] = cleaned_address + attempts.append(modified) + + # 3) Config address fallback + if config_address: + modified = deepcopy(epc) + for k in ["address", "address1"]: + modified[k] = config_address + attempts.append(modified) + + sap_rating = float(epc["current-energy-efficiency"]) + + # Iterate attempts + last_error = None + for idx, attempt in enumerate(attempts, start=1): + try: + return cls.get_from_epc( + attempt, epc_page, rrn=rrn, address_postal_town=address_postal_town, sap_rating=sap_rating + ) + except Exception as e: + last_error = e + logger.error(f"Attempt {idx} failed: {e}") + + raise RuntimeError(f"All EPC retrieval attempts failed: {last_error}") diff --git a/etl/spatial/OpenUprnClient.py b/etl/spatial/OpenUprnClient.py index 36cf2d7b..8cef80b1 100644 --- a/etl/spatial/OpenUprnClient.py +++ b/etl/spatial/OpenUprnClient.py @@ -150,9 +150,21 @@ class OpenUprnClient: ) spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)] + # If this is empty, we get the nearest property + for p in input_properties: if p.uprn in associated_uprn: - p.set_spatial(spatial_df[spatial_df["UPRN"] == p.uprn]) + p_spatial_df = spatial_df[spatial_df["UPRN"] == p.uprn] + if p_spatial_df.empty: + # Backup method - take the closest UPRN as a proxy + logger.info("Ordnance survey not found - faking the cloest property for a best estimation") + p_spatial_df = spatial_data.loc[ + (spatial_data["UPRN"] - p.uprn).abs().idxmin() + ].copy() + p_spatial_df["LATITUDE"], p_spatial_df["LONGITUDE"] = None, None + p_spatial_df = p_spatial_df.to_frame().T + + p.set_spatial(p_spatial_df) if p.uprn_source == SearchEpc.UPRN_SOURCE_SIMULATED: p.set_spatial(cls.empty_spatial_df()) diff --git a/etl/webscrape/Zoopla.py b/etl/webscrape/Zoopla.py index bb86c759..9d15e019 100644 --- a/etl/webscrape/Zoopla.py +++ b/etl/webscrape/Zoopla.py @@ -1,38 +1,202 @@ -# Initial Code - -from seleniumbase import SB +from bs4 import BeautifulSoup +import pandas as pd import time +from stealth_requests import StealthSession +import random +import os +from multiprocessing import Pool +from tqdm import tqdm +import re +import json -uprns = [ - 100071297618, - 100080893397, - 100060778033, - 200004793081, - 100071265143, - 100071297618, - 100080893397, - 100060778033, - 200004793081, - 100071265143, -] +ENGINES = ["safari", "chrome"] +CACHE_DIR = "zoopla_cache" +os.makedirs(CACHE_DIR, exist_ok=True) -estimate_list = [] -for uprn in uprns: +def random_delay(): + time.sleep(random.uniform(0.5, 2)) - # Probably can change the timings here - time.sleep(5) - with SB(uc=True) as sb: - sb.uc_open_with_reconnect( - f"https://www.zoopla.co.uk/property/uprn/{uprn}/", - 3, + +def extract_embedded_json(text): + match = re.search( + r'"attributes"\s*:\s*\{.*?\}\s*,.*?"historicSales".*?\]', + text, + re.DOTALL + ) + if match: + snippet = "{" + match.group(0) + "}" + snippet = re.sub(r"\\u0022", '"', snippet) + snippet = re.sub(r",(\s*[}\]])", r"\1", snippet) + try: + return json.loads(snippet) + except json.JSONDecodeError: + pass + + result = {} + for key in [ + "attributes", "energy", "rentEstimate", + "saleEstimate", "saleHistory", "historicSales" + ]: + key_match = re.search( + rf'"{key}"\s*:\s*(\{{.*?\}}|\[.*?\])', + text, + re.DOTALL + ) + if key_match: + try: + result[key] = json.loads(key_match.group(1)) + except Exception: + pass + return result + + +def scrape_all_estimates(session, url): + resp = session.get(url, impersonate=random.choice(ENGINES)) + html = resp.text + soup = BeautifulSoup(html, "html.parser") + estimates = soup.find_all("div", {"data-testid": "sale-estimate"}) + data = extract_embedded_json(html) + + return { + "estimates": estimates, + "is_blocked": len(estimates) == 0, + "response_html": html, + "attributes": data.get("attributes", {}), + "rentEstimate": data.get("rentEstimate", {}), + "historicSales": data.get("historicSales", []), + } + + +def extract_estimates(estimates): + est = estimates[0] + low = est.find("span", {"data-testid": "low-estimate-blurred"}).text + mid = est.find("p", {"data-testid": "estimate-blurred"}).text + high = est.find("span", {"data-testid": "high-estimate-blurred"}).text + return low, mid, high + + +def cache_path_for_url(url): + uprn = url.split("/")[-2] + return os.path.join(CACHE_DIR, f"{uprn}.html") + + +def parse_cached_html(url, html): + soup = BeautifulSoup(html, "html.parser") + estimates = soup.find_all("div", {"data-testid": "sale-estimate"}) + data = extract_embedded_json(html) + history = data.get("historicSales") or [{}] + + if not estimates: + return None + + low, mid, high = extract_estimates(estimates) + + return { + "URL": url, + "Low Estimate": low, + "Middle Estimate": mid, + "High Estimate": high, + **data.get("attributes", {}), + **data.get("rentEstimate", {}), + **history[0], + } + + +def parallel_task(url): + cache_path = cache_path_for_url(url) + + if os.path.exists(cache_path): + with open(cache_path, "r", encoding="utf-8") as f: + html = f.read() + cached = parse_cached_html(url, html) + if cached: + return cached + + with StealthSession() as session: + for attempt in range(5): + output = scrape_all_estimates(session, url) + + if not output["is_blocked"] and output["estimates"]: + html = output.get("response_html") + if html: + with open(cache_path, "w", encoding="utf-8") as f: + f.write(html) + + history = output.get("historicSales") or [{}] + low, mid, high = extract_estimates(output["estimates"]) + + return { + "URL": url, + "Low Estimate": low, + "Middle Estimate": mid, + "High Estimate": high, + **output.get("attributes", {}), + **output.get("rentEstimate", {}), + **history[0], + } + + random_delay() + + return { + "URL": url, + "Low Estimate": None, + "Middle Estimate": None, + "High Estimate": None, + } + + +def parse_price(p): + if not p: + return None + + p = p.replace("£", "").strip().lower() + if p.endswith("k"): + return float(p[:-1]) * 1_000 + if p.endswith("m"): + return float(p[:-1]) * 1_000_000 + + try: + return float(p.replace(",", "")) + except ValueError: + return None + + +if __name__ == "__main__": + asset_list = pd.read_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/modelling_sample.xlsx", + sheet_name="Standardised Asset List" + ) + + asset_list = asset_list[~pd.isnull(asset_list["epc_os_uprn"])] + asset_list = asset_list.drop_duplicates("epc_os_uprn") + asset_list["epc_os_uprn"] = asset_list["epc_os_uprn"].astype(int).astype(str) + + uprns = asset_list["epc_os_uprn"].tolist() + urls = [f"https://www.zoopla.co.uk/property/uprn/{uprn}/" for uprn in uprns] + + with Pool(processes=2) as pool: + estimates_list = list( + tqdm(pool.imap(parallel_task, urls), total=len(urls)) ) - soup = sb.get_beautiful_soup() + df = pd.DataFrame(estimates_list) + df["uprn"] = df["URL"].str.extract(r"uprn/(\d+)/") + df["valuation"] = df["Middle Estimate"].apply(parse_price) - estimates = soup.find_all("div", {"data-testid": "sale-estimate"}) - # Can change the way we extract the text here - estimate_text = ( - estimates[-1].find_all("p")[-1].find_all("span")[-1]["aria-label"] - ) - estimate_list.append(estimate_text) + df.to_csv("zoopla_estimates.csv", index=False) + + merged = asset_list.merge( + df[["uprn", "valuation"]], + left_on="epc_os_uprn", + right_on="uprn", + how="left" + ) + + merged.to_excel( + "20251029 AL Portfolio - Standardised - with valuations.xlsx", + index=False + ) + + print("Done. Results saved.") diff --git a/etl/webscrape/requirements.txt b/etl/webscrape/requirements.txt new file mode 100644 index 00000000..4027a224 --- /dev/null +++ b/etl/webscrape/requirements.txt @@ -0,0 +1,5 @@ +beautifulsoup4>=4.12.0 +pandas>=2.0.0 +stealth-requests>=1.0.7 +tqdm>=4.65.0 +openpyxl \ No newline at end of file diff --git a/infrastructure/terraform/dev.tfvars b/infrastructure/terraform/dev.tfvars index dc84a01f..92b7e158 100644 --- a/infrastructure/terraform/dev.tfvars +++ b/infrastructure/terraform/dev.tfvars @@ -9,7 +9,7 @@ api_url_prefix = "api" # Database allocated_storage = 20 -instance_class = "db.t3.micro" +instance_class = "db.t4g.medium" database_name = "DevAssessmentModelDB" # S3 diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/main.tf index 10ef31c2..5a67b793 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/main.tf @@ -66,7 +66,7 @@ resource "aws_security_group" "allow_db" { resource "aws_db_instance" "default" { allocated_storage = var.allocated_storage engine = "postgres" - engine_version = "14.13" + engine_version = "14.17" instance_class = var.instance_class db_name = var.database_name username = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)["db_assessment_model_username"] @@ -85,6 +85,8 @@ resource "aws_db_instance" "default" { ca_cert_identifier = "rds-ca-rsa2048-g1" # Temporary to enfore immediate change apply_immediately = true + # Set up storage type to gp3 for better performance + storage_type = "gp3" } # Set up the bucket that recieve the csv uploads of epc to be retrofit @@ -175,6 +177,12 @@ module "retrofit_hotwater_kwh_predictions" { allowed_origins = var.allowed_origins } +module "retrofit_sap_baseline_predictions" { + source = "./modules/s3" + bucketname = "retrofit-sap-baseline-predictions-${var.stage}" + allowed_origins = var.allowed_origins +} + // We make this bucket presignable, because we want to generate download links for the frontend module "retrofit_energy_assessments" { source = "./modules/s3_presignable_bucket" @@ -251,6 +259,12 @@ module "lambda_hotwater_kwh_prediction_ecr" { source = "./modules/ecr" } +# Baselining models +module "sap_baseline_ecr" { + ecr_name = "sap-baseline-prediction-${var.stage}" + source = "./modules/ecr" +} + ############################################## # CDN - Cloudfront ############################################## @@ -261,4 +275,17 @@ module "cloudfront_distribution" { bucket_arn = module.s3.bucket_arn bucket_domain_name = module.s3.bucket_domain_name stage = var.stage +} + +################################################ +# SES - Email sending +################################################ +module "ses" { + source = "./modules/ses" + domain_name = "domna.homes" + stage = var.stage +} + +output "ses_dns_records" { + value = module.ses.dns_records } \ No newline at end of file diff --git a/infrastructure/terraform/modules/ses/main.tf b/infrastructure/terraform/modules/ses/main.tf new file mode 100644 index 00000000..e8f183ae --- /dev/null +++ b/infrastructure/terraform/modules/ses/main.tf @@ -0,0 +1,50 @@ +resource "aws_ses_domain_identity" "this" { + domain = var.domain_name +} + +# DKIM signing +resource "aws_ses_domain_dkim" "this" { + domain = aws_ses_domain_identity.this.domain +} + +# IAM user for SES SMTP +resource "aws_iam_user" "ses_user" { + name = "${var.stage}-ses-user" +} + +resource "aws_iam_user_policy" "ses_send_policy" { + name = "AllowSESSendEmail" + user = aws_iam_user.ses_user.name + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = [ + "ses:SendEmail", + "ses:SendRawEmail" + ] + Resource = "*" + } + ] + }) +} + +resource "aws_iam_access_key" "ses_user" { + user = aws_iam_user.ses_user.name +} + +# Store SMTP credentials in AWS Secrets Manager +resource "aws_secretsmanager_secret" "ses_smtp" { + name = "${var.stage}/ses/smtp_credentials" + description = "SMTP credentials for SES (${var.stage})" +} + +resource "aws_secretsmanager_secret_version" "ses_smtp" { + secret_id = aws_secretsmanager_secret.ses_smtp.id + secret_string = jsonencode({ + username = aws_iam_access_key.ses_user.id + password = aws_iam_access_key.ses_user.ses_smtp_password_v4 + }) +} \ No newline at end of file diff --git a/infrastructure/terraform/modules/ses/outputs.tf b/infrastructure/terraform/modules/ses/outputs.tf new file mode 100644 index 00000000..de708983 --- /dev/null +++ b/infrastructure/terraform/modules/ses/outputs.tf @@ -0,0 +1,66 @@ +# These are our DNS records that will need to be added to our Krystal account + +# TXT record +output "verification_record" { + description = "TXT record required to verify the domain with SES" + value = { + name = "_amazonses.${aws_ses_domain_identity.this.domain}" + type = "TXT" + value = aws_ses_domain_identity.this.verification_token + } +} + +# DKIM CNAME records +output "dkim_records" { + description = "CNAME records required to enable DKIM for SES" + value = [ + for dkim in aws_ses_domain_dkim.this.dkim_tokens : { + name = "${dkim}._domainkey.${aws_ses_domain_identity.this.domain}" + type = "CNAME" + value = "${dkim}.dkim.amazonses.com" + } + ] +} + +# SMTP credentials - send them to secrets manager +output "ses_smtp_secret_arn" { + description = "ARN of the SES SMTP credentials stored in Secrets Manager" + value = aws_secretsmanager_secret.ses_smtp.arn +} + +output "smtp_password" { + value = aws_iam_access_key.ses_user.ses_smtp_password_v4 + sensitive = true + description = "SMTP password for SES" +} + +output "dns_records" { + description = "All DNS records required for SES verification and recommended deliverability" + value = concat( + [ + { + name = "_amazonses.${aws_ses_domain_identity.this.domain}" + type = "TXT" + value = aws_ses_domain_identity.this.verification_token + }, + { + name = var.domain_name + type = "TXT" + value = "v=spf1 include:amazonses.com -all" + }, + { + name = "_dmarc.${var.domain_name}" + type = "TXT" + value = "v=DMARC1; p=quarantine; rua=mailto:postmaster@${var.domain_name}" + } + ], + [ + for dkim in aws_ses_domain_dkim.this.dkim_tokens : { + name = "${dkim}._domainkey.${aws_ses_domain_identity.this.domain}" + type = "CNAME" + value = "${dkim}.dkim.amazonses.com" + } + ] + ) +} + diff --git a/infrastructure/terraform/modules/ses/variables.tf b/infrastructure/terraform/modules/ses/variables.tf new file mode 100644 index 00000000..d8c97d6d --- /dev/null +++ b/infrastructure/terraform/modules/ses/variables.tf @@ -0,0 +1,9 @@ +variable "domain_name" { + description = "The domain to verify with SES (e.g. domna.homes)" + type = string +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} diff --git a/pytest.ini b/pytest.ini index 84c686b1..1422657b 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] pythonpath = . addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests diff --git a/recommendations/Costs.py b/recommendations/Costs.py index 33d7b061..86062433 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -1,6 +1,7 @@ import numpy as np from recommendations.county_to_region import county_to_region_map from utils.logger import setup_logger +from backend.ml_models.AnnualBillSavings import AnnualBillSavings logger = setup_logger() @@ -21,25 +22,6 @@ regional_labour_variations = [ {"Region": "Northern Ireland", "Adjustment_Factor": 0.76} ] -# This data is based on the MCS database - taken the figures for June 2024 -MCS_SOLAR_PV_COST_DATA = { - "last_updated": "2024-07-10", - "average_cost_per_kwh": 1825, - "average_cost_per_kwh-Outer London": 1950, - "average_cost_per_kwh-Inner London": 1950, - "average_cost_per_kwh-South East England": 1966, - "average_cost_per_kwh-South West England": 1864, - "average_cost_per_kwh-East of England": 1719, - "average_cost_per_kwh-East Midlands": 1730, - "average_cost_per_kwh-West Midlands": 1789, - "average_cost_per_kwh-North East England": 1872, - "average_cost_per_kwh-North West England": 1860, - "average_cost_per_kwh-Yorkshire and the Humber": 1789, - "average_cost_per_kwh-Wales": 1676, - "average_cost_per_kwh-Scotland": 1781, - "average_cost_per_kwh-Northern Ireland": 1347, -} - # Installers are now working with 435 watt panels PANEL_SIZE = 0.435 @@ -61,47 +43,40 @@ INSTALLER_SOLAR_COSTS = [ {'n_panels': 18, 'array_kwp': 18 * PANEL_SIZE, 'cost': 6792.57, 'installer': 'CEG'} ] +# These are costs we received from CRG, for pricing up air source heat pumps +# These are costs that we have been provided from CRG specifically for air source heat pumps +ASHP_SMALL_SYSTEM_COST = 8812.92 # 4.8 to 8.5, based on their pricing +ASHP_LARGE_SYSTEM_COST = 11053.25 +ASHP_SECURITY = 455.00 +ASHP_WALL_BRACKET = 574.17 +ASHP_DISTRIBUTION_SYSTEM_COSTS = [ + {"n_radiators": 4, "cost": 3380.00}, + {"n_radiators": 5, "cost": 3607.50}, + {"n_radiators": 6, "cost": 4116.67}, + {"n_radiators": 7, "cost": 4647.50}, + {"n_radiators": 8, "cost": 5200.00}, + {"n_radiators": 9, "cost": 5730.83}, + {"n_radiators": 10, "cost": 6283.33}, + {"n_radiators": 11, "cost": 6857.50}, + {"n_radiators": 12, "cost": 7431.67}, + {"n_radiators": 13, "cost": 8016.67}, + {"n_radiators": 14, "cost": 8612.50}, + {"n_radiators": 15, "cost": 9219.17}, + {"n_radiators": 16, "cost": 9804.17}, + {"n_radiators": 17, "cost": 10389.17}, +] +ASHP_CYLINDER_COSTS = [ + {"capacity_l": 120, "cost": 3318.25}, + {"capacity_l": 180, "cost": 3480.75}, + {"capacity_l": 200, "cost": 3853.42}, + {"capacity_l": 250, "cost": 3961.75}, +] + # CEG uses use Solshare as an inverter to provide solar PV to multiple flats. This costs £7500 for the inverter alone # https://midsummerwholesale.co.uk/buy/solshare INSTALLER_SOLAR_PV_INVERTER_COST = 7500 INSTALLER_SOLAR_PV_INVERTER_LABOUR_COST = 500 # Just a rough guess to labour costs -# INSTALLER_SCAFFOLDING_COSTS = [ -# {'stories': 1, 'description': '1 Story Scaffold', 'cost': 531.00, 'installer': 'CEG'}, -# {'stories': 2, 'description': '2 Story Scaffold', 'cost': 841.00, 'installer': 'CEG'}, -# {'stories': 3, 'description': '3 Story Scaffold', 'cost': 1077.00, 'installer': 'CEG'} -# ] - -# This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average, -# to be conservative -MCS_AIR_SOURCE_HEAT_PUMP_COST_DATA = { - "Outer London": 13220, - "Inner London": 13220, - "South East England": 13547, - "South West England": 12776, - "East of England": 12585, - "East Midlands": 12239, - "West Midlands": 13182, - "North East England": 11829, - "North West England": 11714, - "Yorkshire and the Humber": 11919, - "Wales": 13701, - "Scotland": 12586, - "Northern Ireland": 12000, # There are hardly any air source heat pump installs going on in Northern Ireland -} - -INSTALLER_ASHP_COSTS = [ - {'capacity_kw': 5.0, 'brand': 'Mitsubishi', 'tank_size_liters': 150, 'cost': 10149.53, 'installer': 'CEG'}, - {'capacity_kw': 6.0, 'brand': 'Mitsubishi', 'tank_size_liters': 170, 'cost': 10823.48, 'installer': 'CEG'}, - {'capacity_kw': 8.5, 'brand': 'Mitsubishi', 'tank_size_liters': 200, 'cost': 11312.43, 'installer': 'CEG'}, - {'capacity_kw': 11.2, 'brand': 'Mitsubishi', 'tank_size_liters': 250, 'cost': 12156.75, 'installer': 'CEG'}, - {'capacity_kw': 14.0, 'brand': 'Mitsubishi', 'tank_size_liters': 300, 'cost': 14405.54, 'installer': 'CEG'}, - {'capacity_kw': 14.0, 'brand': 'Mitsubishi', 'tank_size_liters': 300, 'cost': 14405.54, 'installer': 'CEG'}, - {'capacity_kw': 17.0, 'brand': 'Grant', 'tank_size_liters': 300, 'cost': 14445.00, 'installer': 'CEG'}, - {'capacity_kw': 20.0, 'brand': 'Ecoforest', 'tank_size_liters': 400, 'cost': 21189.41, 'installer': 'CEG'}, - {'capacity_kw': None, 'brand': '2 x cascaded ASHPs', 'tank_size_liters': 500, 'cost': 22950.00, 'installer': 'CEG'} -] - INSTALLER_SOLAR_BATTERY_COSTS = [ {'capacity_kwh': 5, 'description': 'Battery Add on', 'cost': 3769.89, 'installer': 'JJC'}, # {'capacity_kwh': 10, 'description': 'Battery Add on', 'cost': 4300.00, 'installer': 'CEG'}, @@ -350,16 +325,31 @@ class Costs: total_cost = material["total_cost"] * insulation_floor_area - labour_hours = material["labour_hours_per_unit"] * insulation_floor_area - # To install suspended floor insulation, a small to medium size project might be conducted by a team of 3 - # people - labour_days = (labour_hours / 8) / 3 + # We assume the average house takes ~7 days to complete at £300/day incl. VAT, as per checkatrade + # which can be seen here: https://www.checkatrade.com/blog/cost-guides/floor-insulation-cost + # Assumptions + base_days = 7 # The quickest it will be completed + base_area = 45 # The area that can be completed in that time (for a typical 90m2 house) + labour_exponent = 0.85 # Non-linear scaling + daily_labour_rate = 300 # Based on checkatrade + + min_days = 3 # Fewest days it will take + labour_days = max( + min_days, + base_days * (insulation_floor_area / base_area) ** labour_exponent + ) + + labour_cost = labour_days * daily_labour_rate + + total_cost = total_cost + labour_cost + + total_cost = round(total_cost) return { "total": total_cost, "contingency": self.CONTINGENCIES["solid_floor_insulation"] * total_cost, "contingency_rate": self.CONTINGENCIES["solid_floor_insulation"], - "labour_hours": labour_hours, + "labour_hours": labour_days * 8, "labour_days": labour_days, } @@ -751,7 +741,9 @@ class Costs: # Adjust total radiator needs based on built form form_factor = { + 'Enclosed Mid-Terrace': 0.9, 'Mid-Terrace': 0.95, + 'Enclosed End-Terrace': 0.95, 'Semi-Detached': 1.05, 'Detached': 1.25, 'End-Terrace': 1.05 @@ -836,32 +828,55 @@ class Costs: "labour_days": labour_days, } - def air_source_heat_pump(self, ashp_size): - """ - Based on the region and type of property, this function will produce a cost estimation for an air source heat - pump. This cost will include the boiler upgrade scheme grant - - """ - # This is the average cost of a project, we'll add some additional contingency - - if ashp_size is None: - cost = [x for x in INSTALLER_ASHP_COSTS if x["capacity_kw"] is None][0]["cost"] + @staticmethod + def _select_cylinder_capacity(occupants: float): + if occupants <= 2: + return 120 + elif occupants <= 3: + return 180 + elif occupants <= 4: + return 200 else: - cost = [x for x in INSTALLER_ASHP_COSTS if x][0]["cost"] + return 250 - # The costs from installers exclude VAT - vat = cost * self.VAT_RATE - cost = cost + vat + def air_source_heat_pump(self, ashp_size: float, number_heated_rooms: int, total_floor_area: float) -> dict: + """ + We produce a cost estimation for an air source heat pump, based on costs we have received from installers. - # We assume 5 days installation - labour_days = 5 - labour_hours = labour_days * 8 + """ + + system_cost = ( + (ASHP_SMALL_SYSTEM_COST if ashp_size <= 8.5 else ASHP_LARGE_SYSTEM_COST) + ASHP_SECURITY + ASHP_WALL_BRACKET + ) + + available_n_rads = [x["n_radiators"] for x in ASHP_DISTRIBUTION_SYSTEM_COSTS] + if number_heated_rooms < min(available_n_rads): + # We use the smallest value + rads_to_use = min(available_n_rads) + elif number_heated_rooms > max(available_n_rads): + # We use the largest value + rads_to_use = max(available_n_rads) + else: + rads_to_use = int(number_heated_rooms) + + distribution_system_cost = [ + x for x in ASHP_DISTRIBUTION_SYSTEM_COSTS if x["n_radiators"] == rads_to_use + ][0]["cost"] + + # Cylinder cost + est_n_occupants = AnnualBillSavings.calculate_occupants(total_floor_area) + cylinder_capacity = self._select_cylinder_capacity(est_n_occupants) + cylinder_cost = [ + x for x in ASHP_CYLINDER_COSTS if x["capacity_l"] == cylinder_capacity + ][0]["cost"] + + total = system_cost + distribution_system_cost + cylinder_cost return { - "total": cost, - "contingency": cost * self.CONTINGENCIES["air_source_heat_pump"], + "total": total, + "contingency": total * self.CONTINGENCIES["air_source_heat_pump"], "contingency_rate": self.CONTINGENCIES["air_source_heat_pump"], - "vat": vat, - "labour_hours": labour_hours, - "labour_days": labour_days, + "vat": 0, + "labour_hours": 80, + "labour_days": 10, } diff --git a/recommendations/HeatingRecommender.py b/recommendations/HeatingRecommender.py index 73edff53..ea3056ba 100644 --- a/recommendations/HeatingRecommender.py +++ b/recommendations/HeatingRecommender.py @@ -10,6 +10,9 @@ from etl.epc_clean.epc_attributes.MainheatAttributes import MainHeatAttributes from etl.epc_clean.epc_attributes.HotWaterAttributes import HotWaterAttributes from etl.epc_clean.epc_attributes.MainFuelAttributes import MainFuelAttributes from recommendations.HeatingControlRecommender import HeatingControlRecommender +from utils.logger import setup_logger + +logger = setup_logger() class HeatingRecommender: @@ -40,7 +43,23 @@ class HeatingRecommender: # type 1 "boiler_upgrade", # type 2 - "high_heat_retention_storage_heater", + "high_heat_retention_storage_heaters", + ] + } + }, + "Boiler and radiators, mains gas, electric underfloor heating": { + "boiler": { + "mainheating_description": "Boiler and radiators, mains gas, electric underfloor heating", + "recommendation_description": "Upgrade the existing boiler to a new, more efficient condensing " + "boiler. ", + "controls_suffix": "Manual charge controls" + }, + # These are the heating types we need to produce a dual heating recommendation + "dual": { + "recommendation_description": "Upgrade the existing boiler to a new condensing boiler", + "types": [ + # type 1 + "boiler_upgrade", ] } }, @@ -103,6 +122,7 @@ class HeatingRecommender: self.property.main_heating["has_electric"] or self.property.main_heating["has_electricaire"] ) self.has_ashp = self.property.main_heating["has_air_source_heat_pump"] + self.has_gshp = self.property.main_heating["has_ground_source_heat_pump"] self.has_room_heaters = ( self.property.main_heating["has_room_heaters"] or self.property.main_heating["has_portable_electric_heaters"] @@ -126,7 +146,7 @@ class HeatingRecommender: n_trues += 1 if n_trues > 2 or n_trues == 0: - raise Exception("Implement me") + raise NotImplementedError("Implement me, zero or more than two heating systemss") if n_trues == 1: return False @@ -147,13 +167,23 @@ class HeatingRecommender: hhr_suitable = no_mains or self.has_electric_heating_description or self.has_room_heaters - hhr_suitable = hhr_suitable and ( - "underfloor heating" not in self.property.main_heating["clean_description"] - ) + # If the property has community heating heaters in place, we don't recommend HHRSH + has_community_heating = self.property.main_fuel["is_community"] + + # If the property currently has electric underfloor heating, we allow this if there is elecric immersion + # hot water heating + underfloor_not_an_issue = True + if self.property.main_heating["has_electric_underfloor_heating"]: + if self.property.hotwater["heater_type"] != "electric immersion": + underfloor_not_an_issue = False + + hhr_suitable = hhr_suitable and not has_community_heating and underfloor_not_an_issue + + # If the property has a ground source heat pump, or air source heat pump, we don't recommend HHRSH return ( - hhr_suitable and (not ashp_only_heating_recommendation) and not self.has_ashp and - ("high_heat_retention_storage_heater" in measures) + hhr_suitable and (not ashp_only_heating_recommendation) and not self.has_ashp and not self.has_gshp and + ("high_heat_retention_storage_heaters" in measures) ) def is_boiler_upgrade_suitable(self, measures, ashp_only_heating_recommendation): @@ -290,16 +320,10 @@ class HeatingRecommender: measures = MEASURE_MAP["heating"] if measures is None else measures - # TODO: We could have a system flush recommendation for an existing boiler, where there is no need to replace - # the boiler, but instead flushing the system will make it run more efficiently. There is a cost for this - # in the Costs class, stored as SYSTEM_FLUSH_COST - - # TODO: Right now, we don't have recommendations for electric boilers - we should probably have one - # if we have a non-invasive ashp recommendation, we get the configuration directly from the property instance non_invasive_ashp_recommendation = next( (r for r in self.property.non_invasive_recommendations if r["type"] == "air_source_heat_pump"), - {"suitable": True} + {"survey": False} ) # We allow for the non-invasive recommendation to be that ASHP is not suitable @@ -344,8 +368,8 @@ class HeatingRecommender: if ( self.property.is_ashp_valid(measures=measures) and - non_invasive_ashp_recommendation["suitable"] and - not self.has_ashp + len(non_invasive_ashp_recommendation) and + not self.has_ashp and not self.has_gshp ): self.recommend_air_source_heat_pump( phase=phase, @@ -486,6 +510,57 @@ class HeatingRecommender: return heat_pump_size + @staticmethod + def estimate_peak_kw( + floor_area_m2: float, + epc_primary_kwh_per_m2_yr: float | None = None, + # Prefer these if available: + space_heat_kwh_per_m2_yr: float | None = None, # from EPC/SAP if you can + heat_loss_parameter_W_per_m2K: float | None = None, # HLP if available + primary_to_delivered_factor: float = 1.0, + space_heat_fraction_range=(0.5, 0.75), + hdd_base_dd: float = 2100.0, # set per location (base 15.5 °C typical UK) + t_indoor_C: float = 21.0, + t_design_ext_C: float = -3.0, + ): + ΔT = t_indoor_C - t_design_ext_C + + # 1) Best available path: HLP → direct peak + if heat_loss_parameter_W_per_m2K is not None: + peak_kw = heat_loss_parameter_W_per_m2K * floor_area_m2 * ΔT / 1000.0 + return peak_kw, peak_kw # no range needed + + # 2) Second-best: space-heating demand → HDD method + if space_heat_kwh_per_m2_yr is not None: + annual_space_kwh = space_heat_kwh_per_m2_yr * floor_area_m2 + Htot = annual_space_kwh * 1000.0 / (hdd_base_dd * 24.0) # W/K + peak_kw = Htot * ΔT / 1000.0 + return peak_kw, peak_kw + + # 3) Minimal inputs: primary energy + assumed fraction → range + assert epc_primary_kwh_per_m2_yr is not None + annual_primary = epc_primary_kwh_per_m2_yr * floor_area_m2 + annual_delivered = annual_primary / primary_to_delivered_factor + + def to_peak(space_fraction): + annual_space = annual_delivered * space_fraction + Htot = annual_space * 1000.0 / (hdd_base_dd * 24.0) + return Htot * ΔT / 1000.0 + + low = to_peak(space_heat_fraction_range[0]) + high = to_peak(space_heat_fraction_range[1]) + return low, high + + @staticmethod + def pick_model(peak_kw_range, models_kw=(5, 6, 8.5, 11.2, 14, 17, 20)): + target = peak_kw_range[1] # cover the upper end + for kw in models_kw: + if kw >= target: + return kw + + # Return the largest + return max(models_kw) + def recommend_air_source_heat_pump(self, phase, has_cavity_or_loft_recommendations, _return=False): """ This method will implement the recommendation for an air source heat pump @@ -501,9 +576,29 @@ class HeatingRecommender: controls_recommender = HeatingControlRecommender(self.property) controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric", phase=phase) - ashp_size = self.size_heat_pump() + # ashp_size = self.size_heat_pump() - ashp_costs = self.costs.air_source_heat_pump(ashp_size) + # New functions to estimate size of ASHP + estimated_load = self.estimate_peak_kw( + floor_area_m2=self.property.floor_area, + epc_primary_kwh_per_m2_yr=self.property.data["energy-consumption-current"], + primary_to_delivered_factor=1.55, # use 1.13 if heating fuel is gas + space_heat_fraction_range=(0.35, 0.60), + hdd_base_dd=2000.0, # set from location + t_indoor_C=21.0, + t_design_ext_C=-1.0 # set from local CIBSE table + ) + ashp_size = self.pick_model(estimated_load) + + number_heated_rooms = self._estimate_n_heated_rooms() + # We now adjust this depending on the floor area to get number of communcal rooms (e.g. hallways) + communal_heated_rooms = self._estimate_n_communal_heated_rooms() + + ashp_costs = self.costs.air_source_heat_pump( + ashp_size, + number_heated_rooms=number_heated_rooms + communal_heated_rooms, + total_floor_area=self.property.floor_area + ) if non_intrusive_recommendation: # Update with non-intrusive recommendation if non_intrusive_recommendation.get("cost"): @@ -824,6 +919,56 @@ class HeatingRecommender: return already_has_hhr and already_has_hhr_contols + def _estimate_n_heated_rooms(self): + # If the property is off-gas and has no heating system in place, the number of heated rooms will actually + # be 0, so we use the number of rooms as the figure + number_heated_rooms = ( + self.property.data["number-heated-rooms"] if self.property.data["number-heated-rooms"] > 0 + else ( + self.property.number_of_rooms - 1 if self.property.number_of_rooms > 1 else + self.property.number_of_rooms + ) + ) + # To be conservative, we adjust if we still have 1 room + if (number_heated_rooms == 1) and (self.property.number_of_rooms > 2): + number_heated_rooms = self.property.number_of_rooms - 1 + + return number_heated_rooms + + def _estimate_n_communal_heated_rooms(self) -> int: + """ + Estimate number of communal circulation rooms (hallways / landings) that may reasonably contain a heater + """ + + # Base assumptions + base_by_type = { + "Flat": 1, + "Maisonette": 1, + "Bungalow": 1, + "House": 2, + } + + # Fallback if property type unknown + base = base_by_type.get(self.property.data["property-type"], 1) + + # Area-based adjustments + if self.property.data["property-type"] in ("Flat", "Maisonette"): + if self.property.floor_area > 90: + return base + 1 # duplex or very large flat + return base + + if self.property.data["property-type"] == "Bungalow": + if self.property.floor_area > 100: + return base + 1 # secondary corridor + return base + + if self.property.data["property-type"] == "House": + if self.property.floor_area > 140: + return base + 1 # extra landing / circulation + return base + + return base + def recommend_hhr_storage_heaters(self, phase, system_change, heating_controls_only, _return=False): """ We will recommend upgrading to a high heat retention storage system, if the current system is not already @@ -853,9 +998,11 @@ class HeatingRecommender: if self.property.main_heating_controls["clean_description"] != self.high_heat_retention_contols_desc: if self.dual_heating: - controls_prefix = self.DUAL_HEATING_DESCRIPTIONS[ - self.property.main_heating["clean_description"] - ]["hhr"]["controls_prefix"] + controls_prefix = self._map_dual_heating_description( + backup_map_to_description="current_controls", + output_type="controls_prefix", + recommendation_type="hhr" + ) if controls_prefix == "current_controls": description_prefix = self.property.main_heating_controls["clean_description"] @@ -881,15 +1028,17 @@ class HeatingRecommender: # We check if there is a high heat retention non-intrusive recommendation non_intrusive_recommendation = next( (r for r in self.property.non_invasive_recommendations if - r["type"] == "high_heat_retention_storage_heater"), + r["type"] == "high_heat_retention_storage_heaters"), {} ) # We check if the property has dual heating in place with a boiler and storage heaters if self.dual_heating: - new_heating_description = self.DUAL_HEATING_DESCRIPTIONS[ - self.property.main_heating["clean_description"] - ]["hhr"]["mainheating_description"] + new_heating_description = self._map_dual_heating_description( + backup_map_to_description="Electric storage heaters", + output_type="mainheating_description", + recommendation_type="hhr" + ) new_hot_water_description = self.property.hotwater["clean_description"] # We keep the hot water system else: new_heating_description = "Electric storage heaters" @@ -923,18 +1072,7 @@ class HeatingRecommender: else: heating_simulation_config["hot_water_energy_eff_ending"] = self.property.data["hot-water-energy-eff"] - # If the property is off-gas and has no heating system in place, the number of heated rooms will actually - # be 0, so we use the number of rooms as the figure - number_heated_rooms = ( - self.property.data["number-heated-rooms"] if self.property.data["number-heated-rooms"] > 0 - else ( - self.property.number_of_rooms - 1 if self.property.number_of_rooms > 1 else - self.property.number_of_rooms - ) - ) - # To be conservative, we adjust if we still have 1 room - if (number_heated_rooms == 1) and (self.property.number_of_rooms > 2): - number_heated_rooms = self.property.number_of_rooms - 1 + number_heated_rooms = self._estimate_n_heated_rooms() # We focus on the 700 watt product hhrsh_product = next((x for x in self.hhrsh_products if x["size"] == 700), {}) @@ -946,10 +1084,12 @@ class HeatingRecommender: product=hhrsh_product ) if self.dual_heating: - description = self.DUAL_HEATING_DESCRIPTIONS[ - self.property.main_heating["clean_description"] - ]["hhr"]["recommendation_description"] - + description = self._map_dual_heating_description( + backup_map_to_description="Install high heat retention electric storage heaters with an appropriate " + "off-peak tariff.", + output_type="recommendation_description", + recommendation_type="hhr" + ) else: description = "Install high heat retention electric storage heaters with an appropriate off-peak tariff." @@ -969,6 +1109,7 @@ class HeatingRecommender: "hot-water-energy-eff": heating_simulation_config["hot_water_energy_eff_ending"] } + # TODO: Probably don't need to use this for HHRSH - simplify recommendations = self.combine_heating_and_controls( controls_recommendations=controls_recommender.recommendation, heating_simulation_config=heating_simulation_config, @@ -978,10 +1119,16 @@ class HeatingRecommender: phase=phase, heating_controls_only=heating_controls_only, system_change=system_change, - system_type="high_heat_retention_storage_heater", + system_type="high_heat_retention_storage_heaters", non_intrusive_recommendation=non_intrusive_recommendation, heating_product=hhrsh_product ) + + # Check if HHRSH are already installed + already_installed = "high_heat_retention_storage_heaters" in self.property.already_installed + for rec in recommendations: + rec["already_installed"] = already_installed + if _return: return recommendations @@ -1038,6 +1185,61 @@ class HeatingRecommender: return max(num_heated_rooms * 1.5, 6) + def _map_dual_heating_description( + self, backup_map_to_description, output_type, recommendation_type + ): + """ + Utility function to handle dual heating systems + :param backup_map_to_description: + :return: + """ + + if backup_map_to_description not in [ + # Recommendation descriptions - these are the textual descriptions shown in the front end + "Upgrade to a new condensing boiler.", + "Install high heat retention electric storage heaters with an appropriate off-peak tariff.", + # Simulation descriptions - this is the new EPC description we simulate with in the case + # of single heating + "Boiler and radiators, mains gas", + "Electric storage heaters", + # Suffixes allowed + "", + # Controls prefixes + "current_controls" + ]: + raise ValueError(f"Invalid backup_map_to_description, given {backup_map_to_description}") + + if output_type not in [ + "recommendation_description", + "mainheating_description", + "controls_suffix", + "controls_prefix", + ]: + raise ValueError(f"Invalid output_type, given {output_type}") + + if recommendation_type not in [ + "boiler", + "hhr", + ]: + raise ValueError(f"Given invalid recommendation type {recommendation_type}") + + # "Upgrade to a new condensing boiler." + if self.dual_heating: + + # We check if we have a mapped description + if self.property.main_heating["clean_description"] not in self.DUAL_HEATING_DESCRIPTIONS: + logger.warning( + f"We have a dual heating system that hasn't been mapped, defaulting to single " + f"{self.property.main_heating['clean_description']}" + ) + return backup_map_to_description + + return self.DUAL_HEATING_DESCRIPTIONS[ + self.property.main_heating["clean_description"] + ][recommendation_type][output_type] + + return backup_map_to_description + def recommend_boiler_upgrades(self, phase, system_change, exising_room_heaters): """ This boiler recommendation will only recommend a like-for-like upgrade, since changing the system @@ -1073,12 +1275,11 @@ class HeatingRecommender: if has_inefficient_space_heating or has_inefficient_water: - if self.dual_heating: - description = self.DUAL_HEATING_DESCRIPTIONS[ - self.property.main_heating["clean_description"] - ]["boiler"]["recommendation_description"] - else: - description = "Upgrade to a new condensing boiler." + description = self._map_dual_heating_description( + backup_map_to_description="Upgrade to a new condensing boiler.", + output_type="recommendation_description", + recommendation_type="boiler" + ) new_heating_eff = ( "Good" if self.property.data["mainheat-energy-eff"] in ["Very Poor", "Poor", "Average"] @@ -1103,13 +1304,12 @@ class HeatingRecommender: if system_change: # Installation of a boiler improves the hot water system so we need to reflect this in # the outcome of the recommendation - if self.dual_heating: - new_heating_description = self.DUAL_HEATING_DESCRIPTIONS[ - self.property.main_heating["clean_description"] - ]["boiler"]["mainheating_description"] - else: - new_heating_description = "Boiler and radiators, mains gas" + new_heating_description = self._map_dual_heating_description( + backup_map_to_description="Boiler and radiators, mains gas", + output_type="mainheating_description", + recommendation_type="boiler" + ) new_hotwater_description = "From main system" new_fuel_description = "mains gas (not community)" @@ -1148,7 +1348,7 @@ class HeatingRecommender: n_rooms=self.property.number_of_rooms ) - already_installed = "heating" in self.property.already_installed + already_installed = "boiler_upgrade" in self.property.already_installed if already_installed: boiler_costs = override_costs(boiler_costs) description = "Heating system has already been upgraded, no further action needed." @@ -1175,9 +1375,11 @@ class HeatingRecommender: # If the property did not previously have a boiler, we combine controls_recommender = HeatingControlRecommender(self.property) if self.dual_heating: - description_suffix = self.DUAL_HEATING_DESCRIPTIONS[ - self.property.main_heating["clean_description"] - ]["boiler"]["controls_suffix"] + description_suffix = self._map_dual_heating_description( + backup_map_to_description="", + output_type="controls_suffix", + recommendation_type="boiler" + ) else: description_suffix = "" controls_recommender.recommend( @@ -1193,9 +1395,11 @@ class HeatingRecommender: # If there is not a system change, we add the boiler recommendation at point. self.heating_recommendations.extend([boiler_recommendation]) - if system_change: + if system_change and len(boiler_recommendation): # We combine the heating and controls recommendations, in the case of a system change - # If this is true, we set SAP points to None and survey to False for the boiler recommendation + # If this is true, we set SAP points to None and survey to False for the boiler recommendation. + # We check if we actually have a boiler recommendation as we may not if the heating and hot water + # are already efficient enough combined_recommendations = [] for controls_recommendation in controls_recommender.recommendation: diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index fa8fe256..ab13134d 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -19,9 +19,12 @@ from backend.ml_models.AnnualBillSavings import AnnualBillSavings from backend.apis.GoogleSolarApi import GoogleSolarApi import backend.app.assumptions as assumptions from backend.app.plan.schemas import SPECIFIC_MEASURES, MEASURE_MAP, NON_INVASIVE_SPECIFIC_MEASURES +from utils.logger import setup_logger STARTING_DUMMY_ID_VALUE = -9999 +logger = setup_logger() + class Recommendations: """ @@ -83,6 +86,18 @@ class Recommendations: inclusions_full = [MEASURE_MAP[x] if x in MEASURE_MAP else x for x in self.inclusions] exclusions_full = [MEASURE_MAP[x] if x in MEASURE_MAP else x for x in self.exclusions] + + # if we have already installed measures, we need to include them so they get factored into the baseline + # this is something we'll likely need to remove + if self.property_instance.already_installed: + # We make sure that any already installed measures are included + for rec in self.property_instance.already_installed: + if rec not in inclusions_full: + inclusions_full.append(rec) + + # We remove them from the exclusions if they are there + exclusions_full = [e for e in exclusions_full if e not in self.property_instance.already_installed] + # We need to unlist any lists, but we should check if they're lists first inclusions_full = [ item for sublist in inclusions_full for item in (sublist if isinstance(sublist, list) else [sublist]) @@ -269,6 +284,36 @@ class Recommendations: property_recommendations.append(self.solar_recommender.recommendation) phase += 1 + if self.property_instance.already_installed: + # We need to re-shuffle our measures + property_recommendations_removed_installed = [] + already_installed_recs = [] + for recs in property_recommendations: + phase_recs = [] + phase_already_installed_recs = [] + for rec in recs: + if rec["already_installed"]: + phase_already_installed_recs.append(rec) + else: + phase_recs.append(rec) + if phase_recs: + property_recommendations_removed_installed.append(phase_recs) + if phase_already_installed_recs: + already_installed_recs.append(phase_already_installed_recs) + + # We re-set the phases + for i, recs in enumerate(property_recommendations_removed_installed): + for rec in recs: + rec["phase"] = i + # already installed recs get negative phasing + already_installed_phase = -len(already_installed_recs) + for recs in already_installed_recs: + for rec in recs: + rec["phase"] = already_installed_phase + already_installed_phase += 1 + + property_recommendations = already_installed_recs + property_recommendations_removed_installed + # We insert temporary ids into the recommendations which is important for the optimiser later property_recommendations = self.insert_temp_recommendation_id(property_recommendations) @@ -483,6 +528,11 @@ class Recommendations: mv_increasing_variables = ["carbon", "heat_demand"] mv_decreasing_variables = ["sap"] + # We allow for negative phase + starting_phase = min( + rec["phase"] for recs in property_recommendations for rec in recs + ) + impact_summary = [] for recommendations_by_type in property_recommendations: for rec in recommendations_by_type: @@ -523,7 +573,7 @@ class Recommendations: # We structure this so that depending on the phase, we capture the previous phase impacts and # then just have one piece of code to calculate the difference - if rec["phase"] == 0: + if rec["phase"] == starting_phase: # These are just the starting values, from the EPC. When we score the ML models, # heating_cost_starting and heating_cost_ending are just the values in the EPC. However, with # heating_cost_ending, we expect that the EPC will predict a heating cost based on what would happen @@ -612,7 +662,7 @@ class Recommendations: if metric == "sap": property_phase_impact[metric] = round(property_phase_impact[metric], 2) else: - # We prevent these from being positive + # We prevent mechanical ventilation from being positive property_phase_impact[metric] = ( 0 if property_phase_impact[metric] > 0 else property_phase_impact[metric] ) @@ -629,6 +679,38 @@ class Recommendations: property_phase_impact["carbon"], rec["co2_equivalent_savings"] ) + # Update the current phase values + current_phase_values["sap"] = previous_phase_values["sap"] + property_phase_impact["sap"] + current_phase_values["carbon"] = previous_phase_values["carbon"] - property_phase_impact["carbon"] + + # We also ensure that mechanical ventilation doesn't have an ovely strong negative SAP impact + if rec["type"] == "mechanical_ventilation": + # ventilation is capped by having no greater and a -4 impact + ventilation_sap_limit = -4 + + def _check_veniltation_out_of_bounds(sap_impact): + return (sap_impact < ventilation_sap_limit) or (sap_impact >= 0) + + def _adjust_ventilation_sap(sap_impact): + if sap_impact >= 0: + return -1 + if sap_impact < ventilation_sap_limit: + return ventilation_sap_limit + + ventilation_out_of_bounds = _check_veniltation_out_of_bounds(property_phase_impact["sap"]) + + if ventilation_out_of_bounds: + previous_modelled_sap = previous_phase_values.get("sap_prediction", 0) + proposed_sap_impact = current_phase_sap - previous_modelled_sap + proposal_out_of_bounds = _check_veniltation_out_of_bounds(proposed_sap_impact) + if proposal_out_of_bounds: + property_phase_impact["sap"] = _adjust_ventilation_sap(proposed_sap_impact) + else: + property_phase_impact["sap"] = proposed_sap_impact + + # Update the current phase values + current_phase_values["sap"] = previous_phase_values["sap"] + property_phase_impact["sap"] + if rec["type"] == "loft_insulation": # When we have a loft insulation recommendation, where there is an extension and the existing # amount of loft insulation is already good, we limit the SAP points @@ -639,6 +721,8 @@ class Recommendations: ) if li_sap_limit is not None: property_phase_impact["sap"] = min(property_phase_impact["sap"], li_sap_limit) + # Update the current phase values + current_phase_values["sap"] = previous_phase_values["sap"] + property_phase_impact["sap"] if rec["type"] == "solar_pv": # We use the SAP points in the recommendation as a minimum @@ -646,6 +730,8 @@ class Recommendations: rec["sap_points"] if property_phase_impact["sap"] < rec["sap_points"] else property_phase_impact["sap"] ) + # Update the current phase values + current_phase_values["sap"] = previous_phase_values["sap"] + property_phase_impact["sap"] # Insert this information into the recommendation. if not rec.get("survey", False): @@ -666,7 +752,8 @@ class Recommendations: "representative": rec["recommendation_id"] in representative_ids, "recommendation_id": rec["recommendation_id"], "measure_type": rec["measure_type"], - **current_phase_values + **current_phase_values, + "sap_prediction": phase_energy_efficiency_metrics["sap_change"] } ) @@ -678,7 +765,10 @@ class Recommendations: ): # Handle the case of community schemes - if (heating_description == "Community scheme") or (hotwater_description == "Community scheme"): + if (heating_description in ["Community scheme", 'Community scheme, plus solar']) or ( + hotwater_description in ["Community scheme", 'Community scheme, plus solar']) and ( + "not community" not in main_fuel_description + ): if main_fuel_description in ["mains gas (community)", "UNKNOWN"]: return { "heating_fuel_type": "Natural Gas (Community Scheme)", @@ -686,14 +776,57 @@ class Recommendations: "heating_cop": 1, "hotwater_cop": 1 } - raise NotImplementedError("Handle this case") + if main_fuel_description in ['biogas (community)']: + return { + "heating_fuel_type": "Smokeless Fuel", + "hotwater_fuel_type": "Smokeless Fuel", + "heating_cop": 0.85, + "hotwater_cop": 0.85 + } + if main_fuel_description in ['coal (community)']: + return { + "heating_fuel_type": "Coal", + "hotwater_fuel_type": "Coal", + "heating_cop": 0.85, + "hotwater_cop": 0.85 + } + + # Handling specific case + if main_fuel_description in ["To be used only when there is no heating/hot-water system"] and ( + "electric heaters" in heating_description.lower() + ): + return { + "heating_fuel_type": "Electricity", + "hotwater_fuel_type": "Electricity", + "heating_cop": 1, + "hotwater_cop": 1 + } + + logger.warning( + "Unhandled community fuel." + f"Fuel: {main_fuel_description}" + f"Heating: {heating_description}" + f"Heating: {hotwater_description}" + ) + return { + "heating_fuel_type": "Unmapped", + "hotwater_fuel_type": "Unmapped", + "heating_cop": 0.9, + "hotwater_cop": 0.9 + } + + mapped = descriptions_to_fuel_types.get(heating_description.strip(), None) + if mapped is None: + # TODO: This is a non-ideal placeholder but we put something in place for a process that falls over + # fairly regularly. A task has been added to planner to refactor this + logger.warning("Heating description not mapped: %s", heating_description) + mapped = {"fuel": 'Unmapped', "cop": 0.9} - mapped = descriptions_to_fuel_types[heating_description] heating_fuel = mapped["fuel"] if hotwater_description in [ "From main system", "From main system, no cylinder thermostat", - 'From main system, waste water heat recovery' + 'From main system, waste water heat recovery', ]: return { "heating_fuel_type": heating_fuel, "hotwater_fuel_type": heating_fuel, @@ -709,7 +842,14 @@ class Recommendations: "heating_cop": mapped["cop"], "hotwater_cop": 1 } - mapped_hotwater = descriptions_to_fuel_types[hotwater_description] + mapped_hotwater = descriptions_to_fuel_types.get(hotwater_description.strip()) + if mapped_hotwater is None: + # TODO: This is a non-ideal placeholder but we put something in place for a process that falls over + # fairly regularly. A task has been added to planner to refactor this + # We have observed an edge case where the fuel is described as not being community + # but the hot water is. We handle as such + logger.warning("Hot water description not mapped: %s", hotwater_description) + mapped_hotwater = {"fuel": 'Unmapped', "cop": 0.9} return { "heating_fuel_type": heating_fuel, "hotwater_fuel_type": mapped_hotwater["fuel"], @@ -861,6 +1001,33 @@ class Recommendations: pd.isnull(kwh_impact_table["hotwater_fuel_type"]).sum()): raise Exception("Fuel type is missing") + # As one final adjustment, if we + # 1) have a boiler upgrade recommendation + # 2) Have an average efficiency boiler, we adjust the COP of the existing boiler down to 75% + heating_upgrades = [x for x in property_recommendations if x[0]["type"] == "heating"] + boiler_upgrade = [r for recs in heating_upgrades for r in recs if r["measure_type"] == "boiler_upgrade"] + existing_heating_efficiency = property_instance.data["mainheat-energy-eff"] + + if len(boiler_upgrade) and existing_heating_efficiency in ["Very Poor", "Poor", "Average"]: + efficiency_map = {"Very Poor": 0.6, "Poor": 0.65, "Average": 0.7} + adjusted_cop = efficiency_map[existing_heating_efficiency] + boiler_phase = boiler_upgrade[0]["phase"] + heating_measure_types_to_id = [ + {"recommendation_id": r["recommendation_id"], "measure_type": r["measure_type"]} + for r in heating_upgrades[0] + ] + kwh_impact_table = kwh_impact_table.merge( + pd.DataFrame(heating_measure_types_to_id), how="left", on="recommendation_id" + ) + for col in ["heating_cop", "hotwater_cop"]: + kwh_impact_table[col] = np.where( + (kwh_impact_table["phase"] <= boiler_phase) & + (kwh_impact_table["heating_fuel_type"] == "Natural Gas") & + (kwh_impact_table["measure_type"] != "boiler_upgrade"), + adjusted_cop, kwh_impact_table[col] + ) + kwh_impact_table = kwh_impact_table.drop(columns=["measure_type"]) + # We now calculate the fuel cost for k in ["heating", "hotwater"]: kwh_impact_table[f"{k}_cost"] = kwh_impact_table.apply( diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index 0324c9cb..1e5636ff 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -150,7 +150,7 @@ class RoofRecommendations: return # If we have a u-value and we don't have a non-invasive recommendation, we can't recommend anything - if u_value and not any( + if (u_value is not None) and not any( x in MEASURE_MAP["roof_insulation"] for x in [r["type"] for r in self.property.non_invasive_recommendations] ): # We don't have enough information to provide a recommendation diff --git a/recommendations/VentilationRecommendations.py b/recommendations/VentilationRecommendations.py index a8aa0ca3..ed2f50e2 100644 --- a/recommendations/VentilationRecommendations.py +++ b/recommendations/VentilationRecommendations.py @@ -39,7 +39,7 @@ class VentilationRecommendations(Definitions): parts = self.mechanical_ventilation_materials.copy() - already_installed = "cavity_wall_insulation" in self.property.already_installed + already_installed = "mechanical_ventilation" in self.property.already_installed # TODO: We now have multiple ventilation options - we default to selecting the cheapest option part = min(parts, key=lambda x: x['total_cost']) diff --git a/recommendations/WallRecommendations.py b/recommendations/WallRecommendations.py index 53a74534..284d1d2a 100644 --- a/recommendations/WallRecommendations.py +++ b/recommendations/WallRecommendations.py @@ -72,6 +72,7 @@ class WallRecommendations(Definitions): 'Timber frame, as built, partial insulation': 'Timber frame, with external insulation', "Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with external insulation", "Sandstone, as built, no insulation": "Sandstone, with external insulation", + "Sandstone, as built, partial insulation": "Sandstone, with external insulation", } # These are the ending descriptions we consider for walls with internal insulation @@ -88,6 +89,7 @@ class WallRecommendations(Definitions): 'Timber frame, as built, partial insulation': 'Timber frame, with internal insulation', "Sandstone or limestone, as built, no insulation": "Sandstone or limestone, with internal insulation", "Sandstone, as built, no insulation": "Sandstone, with internal insulation", + "Sandstone, as built, partial insulation": "Sandstone, with internal insulation", } def __init__( @@ -163,13 +165,12 @@ class WallRecommendations(Definitions): if ( (insulation_thickness in ["average", "above average"]) or self.property.walls["is_filled_cavity"] - ) and ( - "cavity_extract_and_refill" - not in measures + or self.property.walls["clean_description"] is None + ) and ("cavity_extract_and_refill" not in measures ): return - if u_value: + if u_value is not None: if self.property.walls["thermal_transmittance_unit"] != self.U_VALUE_UNIT: raise NotImplementedError( @@ -640,8 +641,18 @@ class WallRecommendations(Definitions): # we separate the logic for for recommending them, therefore we don't # consider diminishing returns between the two as they are considered to be separate measures + prop_already_installed = self.property.already_installed + # So, we'll end up with problems if e.g. an external wall insulation is already installed and we try and + # recommend internal wall insulation. To avoid this, we check if either measure is already installed + # and: + # 1) If EWI is installed, we don't recommend IWI + # 2) If IWI is installed, we don't recommend EWI + # We only produce the recommendation for the moment, for the purpose of re-baselining + ewi_recommendations = [] - if self.ewi_valid() and "external_wall_insulation" in measures: + if self.ewi_valid() and "external_wall_insulation" in measures and ( + "internal_wall_insulation" not in prop_already_installed + ): ewi_recommendations = self._find_insulation( u_value=u_value, insulation_materials=pd.DataFrame( @@ -652,7 +663,7 @@ class WallRecommendations(Definitions): ) iwi_recommendations = [] - if "internal_wall_insulation" in measures: + if "internal_wall_insulation" in measures and "external_wall_insulation" not in prop_already_installed: iwi_recommendations = self._find_insulation( u_value=u_value, insulation_materials=pd.DataFrame(self.internal_wall_insulation_materials), diff --git a/recommendations/WindowsRecommendations.py b/recommendations/WindowsRecommendations.py index bc5e6066..917a1667 100644 --- a/recommendations/WindowsRecommendations.py +++ b/recommendations/WindowsRecommendations.py @@ -52,6 +52,10 @@ class WindowsRecommendations: # We don't make any recommendations in this case. The property already has outstanding glazing return + # We handle the rare case of not having any windows data + if self.property.windows["clean_description"] is None: + return + if self.property.windows["has_glazing"] & ( self.property.windows["glazing_coverage"] == "full" ): @@ -68,23 +72,36 @@ class WindowsRecommendations: elif "secondary_glazing" in measures and "double_glazing" not in measures: is_secondary_glazing = True else: - is_secondary_glazing = self.property.restricted_measures or ( - self.property.windows["glazing_type"] == "secondary" + # If the property currently has some secondary glazing but isn't in a conservation area + # + is_secondary_glazing = self.property.restricted_measures and ( + self.property.data["windows-energy-eff"] in ["Poor", "Very Poor"] ) - windows_area = self.property.windows_area + + # We check if the windows are partially insulated but we're recommending double glazing as a complete + # replacement + double_glazing_replacement = ( + not is_secondary_glazing and + # As defined in coverage_map in windows attributes + self.property.windows["glazing_coverage"] in ["partial", "most"] + ) if not number_of_windows: raise ValueError("Number of windows not specified") - if windows_area is not None: - # TODO - we don't have a price for this so we can't recommend it - print("We have windows area, we should use this data for our recommendations!!!") - # We scale the number of windows based on the proportion of existing glazing if self.property.data["multi-glaze-proportion"] != "": - n_windows_scalar = 1 - ( - int(self.property.data["multi-glaze-proportion"]) / 100 - ) + + if (self.property.windows["clean_description"] == "Some double glazing") and ( + self.property.data["windows-energy-eff"] == "Very Poor") and ( + self.property.data["multi-glaze-proportion"] == 100 + ): + # In this case, we assume all of the dinwos need replacing + n_windows_scalar = 1 + else: + n_windows_scalar = 1 - ( + int(self.property.data["multi-glaze-proportion"]) / 100 + ) else: n_windows_scalar = self.COVERAGE_MAP.get( self.property.windows["glazing_coverage"], 1 @@ -93,6 +110,9 @@ class WindowsRecommendations: number_of_windows *= n_windows_scalar number_of_windows = np.ceil(number_of_windows) + # Handle edge case - prevent number of windows 0 + number_of_windows = max(1, number_of_windows) + # We then price the job based on the number of windows that there are cost_result = self.costs.window_glazing( number_of_windows=number_of_windows, @@ -100,7 +120,10 @@ class WindowsRecommendations: is_secondary_glazing=is_secondary_glazing, ) - already_installed = "windows_glazing" in self.property.already_installed + measure_type = "double_glazing" if not is_secondary_glazing else "secondary_glazing" + + already_installed = measure_type in self.property.already_installed + if already_installed: cost_result = override_costs(cost_result) description = "The property already has double glazing installed. No further action is required." @@ -108,7 +131,7 @@ class WindowsRecommendations: glazing_type = ( "secondary glazing" if is_secondary_glazing else "double glazing" ) - if self.property.windows["glazing_coverage"] in ["partial", "most"]: + if self.property.windows["glazing_coverage"] in ["partial", "most"] and not double_glazing_replacement: description = f"Install {glazing_type} to the remaining windows" else: description = f"Install {glazing_type} to all windows" @@ -185,15 +208,16 @@ class WindowsRecommendations: else: glazed_type_ending = "secondary glazing" new_windows_description = "Multiple glazing throughout" + # Windows only end up with an average efficiency + windows_energy_eff = "Average" else: raise ValueError("Invalid glazing type - implement me") if self.property.data["windows-energy-eff"] == "Very Good": - raise ValueError("Very Good energy efficiency is not supported") + windows_energy_eff = "Very Good" # For post 2002 windows, the energy efficiency is "Good" and so for the simulation, we simulate with "Good" - windows_ending_config = WindowAttributes(new_windows_description).process() windows_simulation_config = check_simulation_difference( @@ -215,8 +239,6 @@ class WindowsRecommendations: "glazed-type": glazed_type_ending, } - measure_type = "double_glazing" if not is_secondary_glazing else "secondary_glazing" - non_invasive_recommendation = next( (r for r in self.property.non_invasive_recommendations if r["type"] in ["windows_glazing", measure_type]), {} diff --git a/recommendations/optimiser/CostOptimiser.py b/recommendations/optimiser/CostOptimiser.py index 50f4b884..8f030123 100644 --- a/recommendations/optimiser/CostOptimiser.py +++ b/recommendations/optimiser/CostOptimiser.py @@ -34,11 +34,11 @@ class CostOptimiser: if min_gain == 0: return min_gain elif min_gain <= 5: - return min_gain + 0.5 + return min_gain + 0.25 elif min_gain <= 20: - return min_gain + 1.5 + return min_gain + 0.5 else: - return min_gain + 2 + return min_gain + 0.75 def setup(self): # Initialize Model @@ -109,7 +109,8 @@ class CostOptimiser: self.m.optimize() if self.m.status == OptimizationStatus.INFEASIBLE: - logger.info("We have an infeasible model, setting up slack model") + # Turn off logging - too noisy + # logger.info("We have an infeasible model, setting up slack model") self.setup_slack() self.m.optimize() diff --git a/recommendations/optimiser/GainOptimiser.py b/recommendations/optimiser/GainOptimiser.py index 7b2e56d2..6b757bf1 100644 --- a/recommendations/optimiser/GainOptimiser.py +++ b/recommendations/optimiser/GainOptimiser.py @@ -133,7 +133,8 @@ class GainOptimiser: (self.m.status == OptimizationStatus.OPTIMAL) and not len(solution) ): if self.allow_slack: - logger.info("We have an infeasible model, setting up slack model") + # Turn off logging - too noisy + # logger.info("We have an infeasible model, setting up slack model") self.setup_slack() self.m.optimize() solution = [ diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 03824ea0..f9e471ce 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -9,6 +9,8 @@ In the future, we will adapt this into a class-based structure to allow for more from copy import deepcopy import pandas as pd +import numpy as np +from itertools import product from backend.app.plan.schemas import ( WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES, ECO4_ELIGIBILE_FABRIC_MEASURES @@ -17,6 +19,7 @@ from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser from utils.logger import setup_logger from backend.Funding import Funding +from backend.app.BatterySapScorer import BatterySAPScorer logger = setup_logger() @@ -91,7 +94,7 @@ def violates_min_insulation(fixed, optimisation_input_measures): # heating (incl. PV) flags is_heating = has_any([ "air_source_heat_pump", - "high_heat_retention_storage_heater", + "high_heat_retention_storage_heaters", "boiler_upgrade", "electric_boiler", "time_temperature_zone_control", @@ -171,7 +174,7 @@ def _prs_solution_ok(items, p, funding): # renewable set: has_ashp = ("air_source_heat_pump" in types) # ASHP alone is renewable has_solar = ("solar_pv" in types) - has_hhrsh = ("high_heat_retention_storage_heater" in types) # only counts *with* solar + has_hhrsh = ("high_heat_retention_storage_heaters" in types) # only counts *with* solar # solar PV qualifies if paired with eligible existing heating solar_ok_existing = has_solar and funding.check_solar_eligible_heating_system( @@ -198,7 +201,53 @@ def _ensure_unfunded_costs(groups): return groups -def optimise_with_funding_paths(p, input_measures, housing_type, funding: Funding, budget=None, target_gain=None): +def _get_already_installed_gain(selected_measures, needs_pre_eco_hhrsh_upgrade): + """ + Calculate already installed gain, with special case for pre-ECO4 HHRSH upgrade. + :param selected_measures: List of selected measures + :param needs_pre_eco_hhrsh_upgrade: Boolean indicating if pre-ECO4 HHRSH upgrade is needed + :return: + """ + if needs_pre_eco_hhrsh_upgrade: + return sum( + [x["gain"] for x in selected_measures if + x["already_installed"] or x["type"] == "high_heat_retention_storage_heaters"] + ) + + return sum([x["gain"] for x in selected_measures if x["already_installed"]]) + + +def _move_hhrsh_to_unfunded(picked, unfunded_picked, needs_pre_eco_hhrsh_upgrade): + """ + This function handles the case of moving HHRSH to unfunded picks if needed, where we have an ECO4 project + where an unfunded measure needs to be installed first. + :param picked: List of picked measures + :param unfunded_picked: List of unfunded picked measures + :param needs_pre_eco_hhrsh_upgrade: Boolean indicating if pre-ECO4 HHRSH upgrade is needed + :return: + """ + + if not needs_pre_eco_hhrsh_upgrade: + return picked, unfunded_picked + + # We append HHRSH to unfunded items + hhrsh_measure = [x for x in picked if x["type"] == "high_heat_retention_storage_heaters"] + if not hhrsh_measure: + raise ValueError("Expected HHRSH measure to be in total picks") + unfunded_picked += hhrsh_measure + # Remove from total picks + picked = [x for x in picked if x["type"] != "high_heat_retention_storage_heaters"] + + return picked, unfunded_picked + + +def has_battery(items): + return any(x.get("has_battery", False) for x in items) + + +def optimise_with_funding_paths( + p, input_measures, housing_type, funding: Funding, budget=None, target_gain=None, work_package=None +): """ run_optimizer(sub_measures, budget, target_gain) -> (picked_options, sub_cost, sub_gain) """ @@ -222,17 +271,20 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin "path": {"reference": "unfunded:all"}, "scheme": "none", "is_eligible": False, # no funding scheme applied - "unfunded_items": [] + "unfunded_items": [], + "already_installed_gain": sum([x["gain"] for x in picked if x["already_installed"]]) }) # This function will filter down on innovation measures if we are social EPC D - funding_paths, optimisation_input_measures = make_funding_paths(p, input_measures, housing_type, funding) + funding_paths, optimisation_input_measures = make_funding_paths( + p, input_measures, housing_type, funding, work_package + ) # We now produce a fabric only path for ECO4 # We add in generic insulation funding paths (where there is no fixed measure) # Heating controls are only eligible if installed as part of a heating upgrade and so we do not include them - # here - if housing_type == "Social": + # here. We don't have an option if the property is a C or above + if housing_type == "Social" and p.data["current-energy-rating"] not in ["C", "B", "A"]: funding_paths = ( [ { @@ -243,6 +295,10 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin ] + funding_paths ) + needs_pre_eco_hhrsh_upgrade = ( + (p.data["current-energy-rating"] == "D") and work_package == "solar_hhrsh_eco4" + ) + for path_spec in funding_paths: # ECO4 fabric only path = special case @@ -264,6 +320,11 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin if not sub_measures: continue + # If the only measure is loft insulation, we skip this because you cannot do a minor measure only (LI) + # under ECO4 + if len(sub_measures) == 1 and sub_measures[0][0]["type"] in ["loft_insulation"]: + continue + picked, sub_cost, sub_gain = run_optimizer( sub_measures, budget=budget, # no fixed items; budget unchanged @@ -275,6 +336,14 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin scheme = _path_scheme([path_spec]) + # We sum of gain, for already installed measures. In this, we also include HHRSH, when we have + # an EPC D property that needs HHRSH but HHRSH isn't an eligible measure + already_installed_gain = _get_already_installed_gain( + picked, needs_pre_eco_hhrsh_upgrade + ) + # If we need a pre-eco4 HHRSH upgrade, we move HHRSH to unfunded items + picked, unfunded_picked = _move_hhrsh_to_unfunded(picked, [], needs_pre_eco_hhrsh_upgrade) + solutions.append( { "fixed_ids": [], @@ -283,8 +352,11 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin "total_gain": sub_gain, "path": path_spec, "scheme": scheme, - "is_eligible": _is_eligible_funding_package(scheme, p.data["current-energy-efficiency"], sub_gain), - "unfunded_items": [] + "is_eligible": _is_eligible_funding_package( + scheme, float(p.data["current-energy-efficiency"]), sub_gain + ), + "unfunded_items": unfunded_picked, + "already_installed_gain": already_installed_gain } ) @@ -301,7 +373,6 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin # We log an error and skip this - we should not see any errors but we can probably get a reasonable # outcome for the end user without a complete termination of the process logger.error("Skipping fixed selection due to minimum insulation violation: %s", fixed) - blah continue scheme = _path_scheme(path_spec) @@ -337,9 +408,7 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin # If we have a budget, we need to ensure the subproblem respects it so we remove the fixed cost (which # may already be over budget) and the fixed gain (which may not be achievable) - if fixed_gain > target_gain: - picked, sub_cost, sub_gain = ([], 0.0, 0.0) - elif fixed_gain < target_gain and not sub_measures: + if (fixed_gain > target_gain) or (fixed_gain <= target_gain and not sub_measures): picked, sub_cost, sub_gain = ([], 0.0, 0.0) else: picked, sub_cost, sub_gain = run_optimizer( @@ -348,8 +417,9 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin sub_target_gain=target_gain - fixed_gain if target_gain is not None else None ) - if picked is None: - continue + # if picked is None: + # # If we have something in sub_measures, then we have a partial solution, just not enough to + # continue scheme = _path_scheme(path_spec) @@ -410,6 +480,18 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin total_cost += unfunded_cost total_gain += unfunded_gain + # We now grab the "already installed gain" + # We sum of gain, for already installed measures. In this, we also include HHRSH, when we have + # an EPC D property that needs HHRSH but HHRSH isn't an eligible measure + already_installed_gain = _get_already_installed_gain( + total_picks, needs_pre_eco_hhrsh_upgrade + ) + + # If we need a pre-eco4 HHRSH upgrade, we move HHRSH to unfunded items + total_picks, unfunded_picked = _move_hhrsh_to_unfunded( + total_picks, unfunded_picked, needs_pre_eco_hhrsh_upgrade + ) + solutions.append({ "fixed_ids": fixed_ids, "items": total_picks, @@ -417,26 +499,54 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin "total_gain": total_gain, "path": path_spec, "scheme": scheme, - "is_eligible": _is_eligible_funding_package(scheme, p.data["current-energy-efficiency"], total_gain), + "is_eligible": _is_eligible_funding_package( + scheme, int(p.data["current-energy-efficiency"]), total_gain + ), "unfunded_items": unfunded_picked, + "already_installed_gain": already_installed_gain }) solutions = pd.DataFrame(solutions) + if solutions.empty: + # We return a blank dataframe + return solutions + # Given the scheme, we now check if the packages are eligible. If they *are* eligible, but they don't meet the # final upgrade target, we then look to perform a final optimisation pass to meet the target gain. solutions["meets_upgrade_target"] = solutions["total_gain"] >= target_gain - 0.1 # If we have packages that are fundable, but do not meet the upgrade target, we can run a final optimisation pass - if not solutions[solutions["is_eligible"] & ~solutions["meets_upgrade_target"]].empty: - logger.info("We have some packages that are fundable but do not meet the target gain") + # Turned off logging - too noisy + # if not solutions[solutions["is_eligible"] & ~solutions["meets_upgrade_target"]].empty: + # logger.info("We have some packages that are fundable but do not meet the target gain") # We now can calculate the project ABS, which subtracts from the cost, but this is only relevant for ECO4 - solutions["starting_sap"] = p.data["current-energy-efficiency"] + solutions["starting_sap"] = int(p.data["current-energy-efficiency"]) solutions["floor_area"] = p.floor_area solutions["ending_sap"] = solutions["starting_sap"] + solutions["total_gain"] - solutions["starting_band"] = solutions["starting_sap"].apply(funding.get_sap_band) - solutions["ending_band"] = solutions["ending_sap"].apply(funding.get_sap_band) + # We flag projects that are including batteries + solutions["has_battery"] = solutions["items"].apply(has_battery) + solutions["array_size"] = solutions["items"].apply( + lambda x: sum(float(y["array_size"]) for y in x if "array_size" in y) + ) + + # For properties that are including batteries, we need to adjust the starting SAP to include the battery SAP uplift + # Note: We score on ending sap, as the battery SAP uplift is based on the ending SAP after fabric/heat/solar + # upgrades of each package is applied + # NB: The battery SAP uplift is used to potentially prioritise packages that include batteries, it does NOT impact + # the eventual SAP score at this point. Once the package is included, we'll re-calculate battery SAP score outside + # of this. This is because + solutions["battery_sap_uplift"] = solutions.apply( + lambda x: BatterySAPScorer.score(starting_sap=x["ending_sap"], pv_size=x["array_size"]) + if x["has_battery"] else 0, + axis=1 + ) + + solutions["starting_band"] = (solutions["starting_sap"] + solutions["already_installed_gain"]).apply( + funding.get_sap_band + ) + solutions["ending_band"] = (solutions["ending_sap"] + solutions["battery_sap_uplift"]).apply(funding.get_sap_band) solutions["floor_area_band"] = solutions["floor_area"].apply(funding.get_floor_area_band) solutions["project_score"] = solutions.apply( lambda x: funding._calculate_full_project_abs( @@ -448,6 +558,7 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin ) rate = funding.get_eco4_abs_rate(is_cavity=p.walls["is_cavity_wall"]) + # The full project funding, at this point, does NOT include any uplifts solutions["full_project_funding"] = solutions["project_score"] * rate # if the scheme is not ECO4, we set the funding to 0 with iloc solutions.loc[solutions["scheme"] != "eco4", "full_project_funding"] = 0.0 @@ -458,9 +569,316 @@ def optimise_with_funding_paths(p, input_measures, housing_type, funding: Fundin solutions["total_uplift"] = solutions.apply(lambda x: get_total_uplift(x), axis=1) solutions["total_uplift_score"] = solutions.apply(lambda x: get_total_innovation_score(x), axis=1) + # Given the solutions we select the optimal one + # 1) If the scheme is ECO4, the full project funding and uplift are deducted from the cost + # 2) If the sheme is GBIS, the partial project funding and uplift are deducted from the cost + # 3) Otherwise, no funding is deducted from the cost + solutions["cost_less_full_project_funding"] = np.where( + solutions["scheme"] == "none", + solutions["total_cost"], + np.where( + solutions["scheme"] == "eco4", + solutions["total_cost"] - solutions["full_project_funding"] - solutions["total_uplift"], + solutions["total_cost"] - solutions["partial_project_funding"] - solutions["total_uplift"] + ) + ) + + solutions = solutions.sort_values("cost_less_full_project_funding", ascending=True) + return solutions +def build_heat_pump_paths( + remaining_wall_measures, + remaining_roof_measures, +): + """ + Build AND-paths using cartesian products. + + Rules: + - Always include air_source_heat_pump + - Choose 1 wall measure if any exist + - Choose 1 roof measure if any exist + """ + + # If a category is empty, use [None] so product still works + wall_choices = remaining_wall_measures or [None] + roof_choices = remaining_roof_measures or [None] + + paths = [] + + for wall, roof in product(wall_choices, roof_choices): + parts = [] + + if wall is not None: + parts.append(wall) + if roof is not None: + parts.append(roof) + + parts.append("air_source_heat_pump") + + paths.append({"AND": parts}) + + return paths + + +def exclude_measure_types(input_measures, excluded_types): + excluded = set(excluded_types) + filtered = [] + + for group in input_measures: + kept = [ + opt for opt in group + if opt["type"] not in excluded + ] + if kept: + filtered.append(kept) + + return filtered + + +def optimise_with_scenarios( + p, + input_measures, + budget=None, + target_gain=None, + enforce_heat_pump_insulation=True, + enforce_fabric_first=False, + already_installed_sap=0 +): + """ + Scenario-based optimiser (funding-agnostic). + + Currently implemented scenarios: + 1) With air source heat pump AND required insulation + """ + + solutions = [] + paths = [] + # Produce the unique list of measure types + all_measure_types = [] + for inputs in input_measures: + all_measure_types.extend([x["type"] for x in inputs]) + all_measure_types = list(set(all_measure_types)) + + # We modify the solar PV gain, if there is a battery, to include an estimated SAP battery uplift, should + # the property hit the upgrade target, plus 1. We add the additional 1 because the higher the starting SAP, + # the lower the battery SAP uplift, so this is a conservative approach since the true SAP score is + # re-calculated later on. + optimisation_measures = deepcopy(input_measures) + for measures in optimisation_measures: + if measures[0]["type"] == "solar_pv": + for x in measures: + if x["has_battery"]: + x["battery_gain"] = BatterySAPScorer.score( + starting_sap=int(p.data["current-energy-efficiency"]) + target_gain + 1, + pv_size=x["array_size"] + ) + x["gain"] += x["battery_gain"] + + if enforce_fabric_first: + # If this is true, it means we only want to consider a fabric first approach. This means that + # - We treat the fabric of the house first + # - Only once the fabric has been upgraded, do we consider heating upgrades + + # This should be wall insulation, roof insulation, floor insulation and windows + fabric_measures = WALL_INSULATION_MEASURES + ROOF_INSULATION_MEASURES + ECO4_ELIGIBILE_FABRIC_MEASURES + [ + "internal_wall_insulation+mechanical_ventilation", "external_wall_insulation+mechanical_ventilation" + ] + + fabric_only_measures = [ + [opt for opt in group if opt["type"] in fabric_measures] for group in optimisation_measures + ] + fabric_only_measures = [g for g in fabric_only_measures if g] + + if not fabric_only_measures: + # If we have no fabric measures, it means the work has already been done and we can proceed + # straight to heating optimisation + picked_fabric, fabric_cost, fabric_gain = [], 0, 0 + else: + picked_fabric, fabric_cost, fabric_gain = run_optimizer( + input_measures=fabric_only_measures, + budget=budget, + sub_target_gain=target_gain, + # If we can achieve the target gain with just insulation measures, we're done + ) + + picked_fabric_types = {m["type"] for m in picked_fabric} + + remaining_measures = [] + for group in optimisation_measures: + kept = [m for m in group if m["type"] not in picked_fabric_types] + if kept: + remaining_measures.append(kept) + + picked_extra, extra_cost, extra_gain = run_optimizer( + remaining_measures, + budget=budget - fabric_cost if budget is not None else None, + sub_target_gain=( + target_gain - fabric_gain + if target_gain is not None + else None + ) + ) + + if picked_extra is None: + picked_extra, extra_cost, extra_gain = [], 0, 0 + + solutions.append({ + "scenario": "fabric_first", + "items": picked_fabric + picked_extra, + "fixed_items": picked_fabric, + "total_cost": fabric_cost + extra_cost, + "total_gain": fabric_gain + extra_gain, + "already_installed_gain": sum([x["gain"] for x in picked_fabric + picked_extra if x["already_installed"]]) + }) + + return append_solution_metrics(solutions, target_gain, p) + + # ------------------------------------------------------------------ + # Scenario 1: Air source heat pump with required insulation + # ------------------------------------------------------------------ + if enforce_heat_pump_insulation: + # Wall measures could be IWI or EWI + remaining_wall_measures = [ + x for x in all_measure_types if x in WALL_INSULATION_MEASURES + [ + "internal_wall_insulation+mechanical_ventilation", "external_wall_insulation+mechanical_ventilation" + ] + ] + remaining_roof_measures = [x for x in all_measure_types if x in ROOF_INSULATION_MEASURES] + + # Mandatory structure: + # - must include ASHP + # - must include >=1 wall insulation (if still needed) + # - must include >=1 roof insulation (if still needed) + # We need all of the combinations of remaining wall and remaining roof measures + heat_pump_paths = build_heat_pump_paths(remaining_wall_measures, remaining_roof_measures) + paths.extend(heat_pump_paths) + + fixed_selections = [] + for path in paths: + result = expand_funding_path(input_measures, [path]) + if result: + fixed_selections.extend(result) + + for fixed in fixed_selections: + + # fixed = [(gi, oi, opt), ...] + fixed_items = [opt for (_, _, opt) in fixed] + fixed_groups = {gi for (gi, _, _) in fixed} + + fixed_cost, fixed_gain = sum_cost_gain(fixed_items) + + # Remaining measures (all other groups) + remaining_measures = [ + grp for gi, grp in enumerate(optimisation_measures) + if gi not in fixed_groups + ] + + # Optimise remaining measures + if ( + target_gain is not None + and fixed_gain >= target_gain + ): + picked, sub_cost, sub_gain = [], 0, 0 + else: + picked, sub_cost, sub_gain = run_optimizer( + remaining_measures, + budget=budget - fixed_cost if budget is not None else None, + sub_target_gain=( + target_gain - fixed_gain + if target_gain is not None + else None + ) + ) + + if picked is None: + continue + + total_items = fixed_items + picked + total_cost = fixed_cost + sub_cost + total_gain = fixed_gain + sub_gain + + solutions.append({ + "scenario": "heat_pump_with_insulation", + "items": total_items, + "fixed_items": fixed_items, + "total_cost": total_cost, + "total_gain": total_gain, + "already_installed_gain": sum([x["gain"] for x in total_items if x["already_installed"]]) + }) + + # ------------------------------------------------------------------ + # Scenario 2: Optimise without air source heat pump + # ------------------------------------------------------------------ + # No special path; just exclude ASHP from options and allow us to optimise. + measures_no_heat_pump = exclude_measure_types(optimisation_measures, ["air_source_heat_pump"]) + + picked, total_cost, total_gain = run_optimizer( + measures_no_heat_pump, + budget=budget, + sub_target_gain=target_gain, + ) + + if picked is not None: + solutions.append({ + "scenario": "no_heat_pump", + "items": picked, + "fixed_items": [], + "total_cost": total_cost, + "total_gain": total_gain, + "already_installed_gain": sum([x["gain"] for x in picked if x["already_installed"]]) + }) + + solutions_df = append_solution_metrics(solutions, target_gain, p, already_installed_sap) + + return solutions_df + + +def _get_ending_sap_without_battery(x): + gain = [y["gain"] - y.get("battery_gain", 0) for y in x["items"]] + return float(sum(gain)) + + +def append_solution_metrics(solutions, target_gain, p, already_installed_sap=0): + """ + Given a set of solutions, this function will return a dataframe, with cost metrics appended, to allow + the end user to select the optimal solution. + :param solutions: + :param target_gain: + :param p: + :param already_installed_sap: + :return: + """ + + solutions_df = pd.DataFrame(solutions) + + if solutions_df.empty: + # We return a blank dataframe + return solutions_df + + # Given the scheme, we now check if the packages are eligible. If they *are* eligible, but they don't meet the + # final upgrade target, we then look to perform a final optimisation pass to meet the target gain. + solutions_df["meets_upgrade_target"] = solutions_df["total_gain"] >= target_gain + # We now can calculate the project ABS, which subtracts from the cost, but this is only relevant for ECO4 + # We flag projects that are including batteries + solutions_df["has_battery"] = solutions_df["items"].apply(has_battery) + solutions_df["array_size"] = solutions_df["items"].apply( + lambda x: sum(float(y["array_size"]) for y in x if "array_size" in y) + ) + + # We need the ending SAP, but we'll need to remove the battery SAP uplift first + + solutions_df["ending_sap_without_battery"] = solutions_df.apply( + lambda x: int(p.data["current-energy-efficiency"]) + already_installed_sap + _get_ending_sap_without_battery(x), + axis=1 + ) + + solutions_df = solutions_df.sort_values("total_cost", ascending=True) + + return solutions_df + + # ---- helpers ------------------------------------------------------------- @@ -604,7 +1022,6 @@ def expand_funding_path(input_measures, path_spec): cands = iter_and_candidates(input_measures, elem["AND"]) else: raise ValueError("unknown path element; expected 'OR' or 'AND'") - if not cands: return [] @@ -659,7 +1076,7 @@ def parse_types(t): def includes_heating(opt_types): return any(x in opt_types for x in { "air_source_heat_pump", - "high_heat_retention_storage_heater", + "high_heat_retention_storage_heaters", "time_temperature_zone_control", # controls count as a heating measure in your pipeline "solar_pv" # you treat PV as heating for funding logic }) @@ -680,6 +1097,10 @@ def run_optimizer(input_measures, budget=None, sub_target_gain=None, allow_slack Thin wrapper over your optimisers. Returns: list[dict] selected_options """ + + if not input_measures: + return None, 0.0, 0.0 + if budget is not None: opt = GainOptimiser( input_measures, max_cost=budget, max_gain=(sub_target_gain or float("inf")), @@ -709,6 +1130,9 @@ def _find_measure(input_measures, measure_type): def _make_solar_heating_funding_paths( p, input_measures, funding_paths, remaining_insulation_type, housing_type, funding: Funding ): + # If a property is private and EPC D or above, it's not eligible + if housing_type == "Private" and p.data["current-energy-rating"] in ["D", "C", "B", "A"]: + return funding_paths # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Solar PV with existing eligible heating system # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -741,7 +1165,7 @@ def _make_solar_heating_funding_paths( # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # We don't include electric boilers as they are not eligible for ECO4 funding solar_heating_combos = [ - ("high_heat_retention_storage_heater", "solar_pv+hhrsh:eco4"), + ("high_heat_retention_storage_heaters", "solar_pv+hhrsh:eco4"), ("air_source_heat_pump", "solar_pv+ashp:eco4"), ] if _find_measure(input_measures, "solar_pv"): @@ -770,11 +1194,11 @@ def _make_solar_heating_funding_paths( single_heating_measures = ["air_source_heat_pump"] else: single_heating_measures = [ - "boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump" + "boiler_upgrade", "high_heat_retention_storage_heaters", "air_source_heat_pump" ] measure_references = { "boiler_upgrade": "boiler_upgrade", - "high_heat_retention_storage_heater": "hhrsh", + "high_heat_retention_storage_heaters": "hhrsh", "air_source_heat_pump": "ashp" } for heating_upgrade in single_heating_measures: @@ -816,7 +1240,7 @@ def _make_generic_gbis_funding_paths(input_gbis_measures, funding_paths): return funding_paths + gbis_funding_paths -def make_funding_paths(p, input_measures, housing_type, funding: Funding): +def make_funding_paths(p, input_measures, housing_type, funding: Funding, work_package=None): """ This function generates funding paths based on the input measures and the tenure of the property. It checks for the presence of specific measures and creates paths that include necessary insulation measures @@ -827,8 +1251,15 @@ def make_funding_paths(p, input_measures, housing_type, funding: Funding): :param input_measures: :param housing_type: :param funding: The funding object that provides methods to check eligibility and calculate funding. + :param work_package: Optional work package information. We handle the case of an EPC D property needing a heating + upgrade, where the heating upgrade needs to be conducted before the solar PV work :return: """ + + # If the property is currently EPC C, there is no funding availability + if p.data["current-energy-rating"] in ["C", "B", "A"]: + return [], input_measures + # We handle the case of minimum insulation requirements. Whenever we have a heating system recommendation, # we *must* include an additional insulation measure, unless the property already has sufficient insulation. @@ -856,14 +1287,22 @@ def make_funding_paths(p, input_measures, housing_type, funding: Funding): if housing_type == "Social" and p.data["current-energy-rating"] == "D": # If the property is currently EPC D, we can only include innovation measures or measures to meet the - # minimum insulation requirements + # minimum insulation requirements. We make an exception if we have a measure that is + # already installed, specifically a heat pump input_measures_innovation = [] input_gbis_measures_innovation = [] for measures in input_measures: group_of_innovation_measures = [] group_of_gbis_innovation_measures = [] for measure in measures: - if measure["innovation_uplift"] or measure["type"] in remaining_insulation_type: + + if measure["type"] == "high_heat_retention_storage_heaters" and work_package == "solar_hhrsh_eco4": + # With this work type, if the property is EPC D and doesn't have an eligible heating system + # we install HHRSH as a pre-requisite measure, before the ECO4 project if complete. + group_of_innovation_measures.append(measure) + + if measure["innovation_uplift"] or measure["type"] in remaining_insulation_type or measure[ + "already_installed"]: group_of_innovation_measures.append(measure) if measure["innovation_uplift"] and measure["type"] in ( @@ -878,7 +1317,7 @@ def make_funding_paths(p, input_measures, housing_type, funding: Funding): input_gbis_measures_innovation.extend(group_of_gbis_innovation_measures) funding_paths = _make_solar_heating_funding_paths( - p, input_measures_innovation, funding_paths, remaining_insulation_type, housing_type, funding + p, input_measures_innovation, funding_paths, remaining_insulation_type, housing_type, funding, ) # Can only be innovation GBIS measures @@ -892,7 +1331,7 @@ def make_funding_paths(p, input_measures, housing_type, funding: Funding): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # 1) The package must include EWI or IWI if the property is private rental sector # We check if we have any EWI or IWI measures available - only for EPC E or below - if p.data["current-energy-rating"] not in ["E", "F", "G"]: + if p.data["current-energy-rating"] in ["E", "F", "G"]: ewi_or_iwi = [{"OR": []}] reference_measures = [] # If we have EWI we add it in diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 98725138..a4543dbf 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -6,7 +6,10 @@ from backend.app.utils import epc_to_sap_lower_bound from recommendations.optimiser.CostOptimiser import CostOptimiser -def prepare_input_measures(property_recommendations, goal, needs_ventilation, funding=False): +def prepare_input_measures( + property_recommendations, goal, needs_ventilation, funding=False, + property_eco_packages=None +): """ Prepares a nested list of measure options for optimisation. @@ -37,6 +40,9 @@ def prepare_input_measures(property_recommendations, goal, needs_ventilation, fu funding: bool, optional If true, the function will include the innovation uplift in the total cost calculation. If false, this is excluded, since innovation uplift cannot be claimed where funding is not available. + property_eco_packages: dict, optional + Eco package data for the property, if available. If a measure has been specified as part of an eco package + (e.g. HHRSH) this function will include that measure in the optimisation, even if it has negative cost savings. Returns ------- @@ -59,6 +65,8 @@ def prepare_input_measures(property_recommendations, goal, needs_ventilation, fu {} ) + eco_measures = property_eco_packages[0] if property_eco_packages else [] + input_measures = [] for recs in property_recommendations: @@ -67,11 +75,18 @@ def prepare_input_measures(property_recommendations, goal, needs_ventilation, fu continue # Filter out solar PV with batteries - if recs[0]["type"] == "solar_pv": - recs = [r for r in recs if ~r["has_battery"]] + # if recs[0]["type"] == "solar_pv": + # recs = [r for r in recs if ~r["has_battery"]] # Only include measures with non-negative cost savings - recs_to_append = [rec for rec in recs if rec["energy_cost_savings"] >= 0] + if eco_measures: + recs_to_append = [ + rec for rec in recs if (rec["energy_cost_savings"] >= 0) or (rec["measure_type"] in eco_measures) + ] + else: + recs_to_append = [ + rec for rec in recs if (rec["energy_cost_savings"] >= 0) + ] if not recs_to_append: continue @@ -108,6 +123,14 @@ def prepare_input_measures(property_recommendations, goal, needs_ventilation, fu else rec["measure_type"] ) + array_size = 0 + if rec["measure_type"] == "solar_pv": + # Grab the parts + solar_part = next( + (part for part in rec["parts"] if part["type"] == "solar_pv"), + ) + array_size = solar_part["size"] + # We also include the innovation uplift to_append.append( { @@ -120,6 +143,9 @@ def prepare_input_measures(property_recommendations, goal, needs_ventilation, fu "partial_project_funding": rec["partial_project_funding"], "partial_project_score": rec["partial_project_score"], "uplift_project_score": rec["uplift_project_score"], + "already_installed": rec.get("already_installed", False), + "has_battery": rec.get("has_battery", False), + "array_size": array_size, } ) @@ -176,7 +202,13 @@ def calculate_fixed_gain(property_required_measures, recommendations, p, needs_v return fixed_gain -def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float) -> float | None: +def calculate_gain( + body: PlanTriggerRequest, + p: Property, + fixed_gain: float, + eco_packages: None | dict = None, + already_installed_gain: float = 0, +) -> float | None: """ Calculates the target gain value for optimisation based on the goal. @@ -193,6 +225,8 @@ def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float) -> Property object with EPC data (must have p.data["current-energy-efficiency"]). fixed_gain : float Total fixed gain from required measures (returned by calculate_fixed_gain). + eco_packages : dict, optional + already_installed_gain: float, optional Returns ------- @@ -200,9 +234,19 @@ def calculate_gain(body: PlanTriggerRequest, p: Property, fixed_gain: float) -> Required SAP gain for EPC, or None for non-EPC goals. """ if body.goal == "Increasing EPC": - current_sap = int(p.data["current-energy-efficiency"]) + current_sap = int(p.data["current-energy-efficiency"]) + already_installed_gain + + target_sap = ( + eco_packages.get(p.id)[1] if eco_packages.get(p.id)[1] is not None + else epc_to_sap_lower_bound(body.goal_value) + ) + + if target_sap <= current_sap: + # We've already met or exceeded the target EPC + return 0 + gain = CostOptimiser.calculate_sap_gain_with_slack( - epc_to_sap_lower_bound(body.goal_value) - current_sap + target_sap - current_sap ) - fixed_gain if body.simulate_sap_10: gain += 3 @@ -307,7 +351,7 @@ def add_best_practice_measures(property_id, solution, recommendations, selected) return selected -def flatten_recommendations_with_defaults(property_id, recommendations, selected): +def flatten_recommendations_with_defaults(property_id, recommendations, selected, battery_sap_score=0): """ Flattens nested recommendation lists for a property and marks which recommendations were selected. @@ -325,6 +369,8 @@ def flatten_recommendations_with_defaults(property_id, recommendations, selected Each value is a list of lists (grouped by measure type). selected : set Set of selected recommendation IDs. + battery_sap_score: int, optional + SAP score uplift from battery storage, if applicable. Returns ------- @@ -332,13 +378,17 @@ def flatten_recommendations_with_defaults(property_id, recommendations, selected A flattened list of recommendation dicts for the given property, each with an added `default` field. """ - final_recommendations = [ - [ - {**rec, "default": rec["recommendation_id"] in selected} - for rec in recommendations_by_type - ] - for recommendations_by_type in recommendations[property_id] - ] + + final_recommendations = [] + for recommendations_by_type in recommendations[property_id]: + recs_by_type = [] + for rec in recommendations_by_type: + rec_copy = {**rec, "default": rec["recommendation_id"] in selected} + if rec_copy.get("has_battery", False): + rec_copy["sap_points"] += battery_sap_score + recs_by_type.append(rec_copy) + + final_recommendations.append(recs_by_type) # Flatten the nested list of lists into a single list return [rec for recommendations_by_type in final_recommendations for rec in recommendations_by_type] diff --git a/recommendations/rdsap_tables.py b/recommendations/rdsap_tables.py index e56faf7c..46e7d083 100644 --- a/recommendations/rdsap_tables.py +++ b/recommendations/rdsap_tables.py @@ -3,6 +3,7 @@ This script contains standard tables which are defined in rdsap. The most recent based on the 2012 version, however the government is currently working on releasing a new version, and there we will need to re-visit this """ + import pandas as pd age_band_data = [ @@ -11,84 +12,98 @@ age_band_data = [ "England_Wales": "before 1900", "Scotland": "before 1919", "Northern_Ireland": "before 1919", - "Park_home_UK": None + "Park_home_UK": None, }, { "age_band": "B", "England_Wales": "1900-1929", "Scotland": "1919-1929", "Northern_Ireland": "1919-1929", - "Park_home_UK": None + "Park_home_UK": None, }, { "age_band": "C", "England_Wales": "1930-1949", "Scotland": "1930-1949", "Northern_Ireland": "1930-1949", - "Park_home_UK": None + "Park_home_UK": None, }, { "age_band": "D", "England_Wales": "1950-1966", "Scotland": "1950-1964", "Northern_Ireland": "1950-1973", - "Park_home_UK": None + "Park_home_UK": None, }, { "age_band": "E", "England_Wales": "1967-1975", "Scotland": "1965-1975", "Northern_Ireland": "1974-1977", - "Park_home_UK": None + "Park_home_UK": None, }, { "age_band": "F", "England_Wales": "1976-1982", "Scotland": "1976-1983", "Northern_Ireland": "1978-1985", - "Park_home_UK": "before 1983" + "Park_home_UK": "before 1983", }, { "age_band": "G", "England_Wales": "1983-1990", "Scotland": "1984-1991", "Northern_Ireland": "1986-1991", - "Park_home_UK": "1983-1995" + "Park_home_UK": "1983-1995", }, { "age_band": "H", "England_Wales": "1991-1995", "Scotland": "1992-1998", "Northern_Ireland": "1992-1999", - "Park_home_UK": None + "Park_home_UK": None, }, { "age_band": "I", "England_Wales": "1996-2002", "Scotland": "1999-2002", "Northern_Ireland": "2000-2006", - "Park_home_UK": "1996-2005" + "Park_home_UK": "1996-2005", }, { "age_band": "J", "England_Wales": "2003-2006", "Scotland": "2003-2007", "Northern_Ireland": None, - "Park_home_UK": None + "Park_home_UK": None, }, { "age_band": "K", "England_Wales": "2007-2011", "Scotland": "2008-2011", "Northern_Ireland": "2007-2013", - "Park_home_UK": "2006 onwards" + "Park_home_UK": "2006 onwards", }, { "age_band": "L", "England_Wales": "2012 onwards", "Scotland": "2012 onwards", "Northern_Ireland": "2014 onwards", - "Park_home_UK": None + "Park_home_UK": None, + }, + { + "age_band": "L", + "England_Wales": "2012-2021", + "Scotland": "2012-2023", + "Northern_Ireland": "2014-2022", + "Park_home_UK": None, + }, + { + "age_band": "M", + "England_Wales": "2022 onwards", + "Scotland": "2024 onwards", + "Northern_Ireland": "2023 onwards", + "Park_home_UK": None, }, ] @@ -102,32 +117,116 @@ england_wales_age_band_lookup = { ######################################################################################################################## default_wall_thickness = [ { - "type": "stone", "A": 500, "B": 500, "C": 500, "D": 500, "E": 450, "F": 420, "G": 420, "H": 420, - "I": 450, "J": 450, "K": 450, "L": 450 + "type": "stone", + "A": 500, + "B": 500, + "C": 500, + "D": 500, + "E": 450, + "F": 420, + "G": 420, + "H": 420, + "I": 450, + "J": 450, + "K": 450, + "L": 450, + "M": 450, }, { - "type": "solid brick", "A": 220, "B": 220, "C": 220, "D": 220, "E": 240, "F": 250, "G": 270, "H": 270, - "I": 300, "J": 300, "K": 300, "L": 300 + "type": "solid brick", + "A": 220, + "B": 220, + "C": 220, + "D": 220, + "E": 240, + "F": 250, + "G": 270, + "H": 270, + "I": 300, + "J": 300, + "K": 300, + "L": 300, + "M": 300, }, { - "type": "cavity", "A": 250, "B": 250, "C": 250, "D": 250, "E": 250, "F": 260, "G": 270, "H": 270, - "I": 300, "J": 300, "K": 300, "L": 300 + "type": "cavity", + "A": 250, + "B": 250, + "C": 250, + "D": 250, + "E": 250, + "F": 260, + "G": 270, + "H": 270, + "I": 300, + "J": 300, + "K": 300, + "L": 300, + "M": 300, }, { - "type": "timber frame", "A": 150, "B": 150, "C": 150, "D": 250, "E": 270, "F": 270, "G": 270, "H": 270, - "I": 300, "J": 300, "K": 300, "L": 300 + "type": "timber frame", + "A": 150, + "B": 150, + "C": 150, + "D": 250, + "E": 270, + "F": 270, + "G": 270, + "H": 270, + "I": 300, + "J": 300, + "K": 300, + "L": 300, + "M": 300, }, { - "type": "cob", "A": 540, "B": 540, "C": 540, "D": 540, "E": 540, "F": 540, "G": 560, "H": 560, "I": 590, - "J": 590, "K": 590, "L": 590 + "type": "cob", + "A": 540, + "B": 540, + "C": 540, + "D": 540, + "E": 540, + "F": 540, + "G": 560, + "H": 560, + "I": 590, + "J": 590, + "K": 590, + "L": 590, + "M": 590, }, { - "type": "system build", "A": 250, "B": 250, "C": 250, "D": 250, "E": 250, "F": 300, "G": 300, "H": 300, - "I": 300, "J": 300, "K": 300, "L": 300 + "type": "system build", + "A": 250, + "B": 250, + "C": 250, + "D": 250, + "E": 250, + "F": 300, + "G": 300, + "H": 300, + "I": 300, + "J": 300, + "K": 300, + "L": 300, + "M": 300, }, { - "type": "park home", "A": None, "B": None, "C": None, "D": None, "E": None, "F": 50, "G": 50, - "H": None, "I": 75, "J": 100, "K": 100, "L": 100 + "type": "park home", + "A": None, + "B": None, + "C": None, + "D": None, + "E": None, + "F": 50, + "G": 50, + "H": None, + "I": 75, + "J": 100, + "K": 100, + "L": 100, + "M": 100, }, ] @@ -168,38 +267,444 @@ wall_types = [ ] u_values = [ - ["a", "a", "a", "a", "1.7b", "1.0", "0.6", "0.60", "0.45", "0.35", "0.30", "0.28"], - ["a", "a", "a", "a", "1.7b", "1.0", "0.6", "0.60", "0.45", "0.35", "0.30", "0.28"], - ["1.7", "1.7", "1.7", "1.7", "1.7", "1.0", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"], - ["0.55", "0.55", "0.55", "0.55", "0.55", "0.45", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"], - ["0.32", "0.32", "0.32", "0.32", "0.32", "0.28", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"], - ["0.23", "0.23", "0.23", "0.23", "0.23", "0.21", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"], - ["0.18", "0.18", "0.18", "0.18", "0.18", "0.17", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"], - ["0.80", "0.80", "0.80", "0.80", "0.80", "0.80", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"], - ["0.40", "0.40", "0.40", "0.40", "0.40", "0.40", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"], - ["0.26", "0.26", "0.26", "0.26", "0.26", "0.26", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"], - ["0.20", "0.20", "0.20", "0.20", "0.20", "0.20", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"], - ["0.16", "0.16", "0.16", "0.16", "0.16", "0.16", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"], - ["1.5", "1.5", "1.5", "1.5", "1.5", "1.0", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"], - ["0.53", "0.53", "0.53", "0.53", "0.53", "0.45", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"], - ["0.32", "0.32", "0.32", "0.32", "0.32", "0.30", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"], - ["0.23", "0.23", "0.23", "0.23", "0.23", "0.21", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"], - ["0.18", "0.18", "0.18", "0.18", "0.18", "0.17", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"], - ["0.7", "0.7", "0.7", "0.7", "0.7", "0.40", "0.35", "0.35", "0.45", "0.35", "0.30", "0.28"], - ["0.37", "0.37", "0.37", "0.37", "0.37", "0.27", "0.25", "0.25", "0.25", "0.25", "0.21", "0.21"], - ["0.25", "0.25", "0.25", "0.25", "0.25", "0.20", "0.19", "0.19", "0.19", "0.19", "0.17", "0.16"], - ["0.19", "0.19", "0.19", "0.19", "0.19", "0.16", "0.15", "0.15", "0.15", "0.15", "0.14", "0.14"], - ["0.16", "0.16", "0.16", "0.16", "0.16", "0.13", "0.13", "0.13", "0.13", "0.13", "0.12", "0.12"], - ["2.5", "1.9", "1.9", "1.0", "0.80", "0.45", "0.40", "0.40", "0.40", "0.35", "0.30", "0.28"], - ["0.60", "0.55", "0.55", "0.40", "0.40", "0.40", "0.40", "0.40", "0.40", "0.35", "0.30", "0.28"], - ["2.0", "2.0", "2.0", "2.0", "1.7", "1.0", "0.60", "0.60", "0.45", "0.35", "0.30", "0.28"], - ["0.60", "0.60", "0.60", "0.60", "0.55", "0.45", "0.35", "0.35", "0.30", "0.25", "0.21", "0.21"], - ["0.35", "0.35", "0.35", "0.35", "0.35", "0.32", "0.24", "0.24", "0.21", "0.19", "0.17", "0.16"], - ["0.25", "0.25", "0.25", "0.25", "0.25", "0.21", "0.18", "0.18", "0.17", "0.15", "0.14", "0.14"], - ["0.18", "0.18", "0.18", "0.18", "0.18", "0.17", "0.15", "0.15", "0.14", "0.13", "0.12", "0.12"], + [ + "a", + "a", + "a", + "a", + "1.7b", + "1.0", + "0.6", + "0.60", + "0.45", + "0.35", + "0.30", + "0.28", + "0.26", + ], + [ + "a", + "a", + "a", + "a", + "1.7b", + "1.0", + "0.6", + "0.60", + "0.45", + "0.35", + "0.30", + "0.28", + "0.26", + ], + [ + "1.7", + "1.7", + "1.7", + "1.7", + "1.7", + "1.0", + "0.60", + "0.60", + "0.45", + "0.35", + "0.30", + "0.28", + "0.26", + ], + [ + "0.55", + "0.55", + "0.55", + "0.55", + "0.55", + "0.45", + "0.35", + "0.35", + "0.30", + "0.25", + "0.21", + "0.21", + "0.20", + ], + [ + "0.32", + "0.32", + "0.32", + "0.32", + "0.32", + "0.28", + "0.24", + "0.24", + "0.21", + "0.19", + "0.17", + "0.16", + "0.15", + ], + [ + "0.23", + "0.23", + "0.23", + "0.23", + "0.23", + "0.21", + "0.18", + "0.18", + "0.17", + "0.15", + "0.14", + "0.14", + "0.13", + ], + [ + "0.18", + "0.18", + "0.18", + "0.18", + "0.18", + "0.17", + "0.15", + "0.15", + "0.14", + "0.13", + "0.12", + "0.12", + "0.11", + ], + [ + "0.80", + "0.80", + "0.80", + "0.80", + "0.80", + "0.80", + "0.60", + "0.60", + "0.45", + "0.35", + "0.30", + "0.28", + "0.26", + ], + [ + "0.40", + "0.40", + "0.40", + "0.40", + "0.40", + "0.40", + "0.35", + "0.35", + "0.30", + "0.25", + "0.21", + "0.21", + "0.20", + ], + [ + "0.26", + "0.26", + "0.26", + "0.26", + "0.26", + "0.26", + "0.24", + "0.24", + "0.21", + "0.19", + "0.17", + "0.16", + "0.15", + ], + [ + "0.20", + "0.20", + "0.20", + "0.20", + "0.20", + "0.20", + "0.18", + "0.18", + "0.17", + "0.15", + "0.14", + "0.14", + "0.13", + ], + [ + "0.16", + "0.16", + "0.16", + "0.16", + "0.16", + "0.16", + "0.15", + "0.15", + "0.14", + "0.13", + "0.12", + "0.12", + "0.11", + ], + [ + "1.5", + "1.5", + "1.5", + "1.5", + "1.5", + "1.0", + "0.60", + "0.60", + "0.45", + "0.35", + "0.30", + "0.28", + "0.26", + ], + [ + "0.53", + "0.53", + "0.53", + "0.53", + "0.53", + "0.45", + "0.35", + "0.35", + "0.30", + "0.25", + "0.21", + "0.21", + "0.20", + ], + [ + "0.32", + "0.32", + "0.32", + "0.32", + "0.32", + "0.30", + "0.24", + "0.24", + "0.21", + "0.19", + "0.17", + "0.16", + "0.15", + ], + [ + "0.23", + "0.23", + "0.23", + "0.23", + "0.23", + "0.21", + "0.18", + "0.18", + "0.17", + "0.15", + "0.14", + "0.14", + "0.13", + ], + [ + "0.18", + "0.18", + "0.18", + "0.18", + "0.18", + "0.17", + "0.15", + "0.15", + "0.14", + "0.13", + "0.12", + "0.12", + "0.11", + ], + [ + "0.7", + "0.7", + "0.7", + "0.7", + "0.7", + "0.40", + "0.35", + "0.35", + "0.45", + "0.35", + "0.30", + "0.28", + "0.26", + ], + [ + "0.37", + "0.37", + "0.37", + "0.37", + "0.37", + "0.27", + "0.25", + "0.25", + "0.25", + "0.25", + "0.21", + "0.21", + "0.20", + ], + [ + "0.25", + "0.25", + "0.25", + "0.25", + "0.25", + "0.20", + "0.19", + "0.19", + "0.19", + "0.19", + "0.17", + "0.16", + "0.15", + ], + [ + "0.19", + "0.19", + "0.19", + "0.19", + "0.19", + "0.16", + "0.15", + "0.15", + "0.15", + "0.15", + "0.14", + "0.14", + "0.13", + ], + [ + "0.16", + "0.16", + "0.16", + "0.16", + "0.16", + "0.13", + "0.13", + "0.13", + "0.13", + "0.13", + "0.12", + "0.12", + "0.11", + ], + [ + "2.5", + "1.9", + "1.9", + "1.0", + "0.80", + "0.45", + "0.40", + "0.40", + "0.40", + "0.35", + "0.30", + "0.28", + "0.26", + ], + [ + "0.60", + "0.55", + "0.55", + "0.40", + "0.40", + "0.40", + "0.40", + "0.40", + "0.40", + "0.35", + "0.30", + "0.28", + "0.26", + ], + [ + "2.0", + "2.0", + "2.0", + "2.0", + "1.7", + "1.0", + "0.60", + "0.60", + "0.45", + "0.35", + "0.30", + "0.28", + "0.26", + ], + [ + "0.60", + "0.60", + "0.60", + "0.60", + "0.55", + "0.45", + "0.35", + "0.35", + "0.30", + "0.25", + "0.21", + "0.21", + "0.20", + ], + [ + "0.35", + "0.35", + "0.35", + "0.35", + "0.35", + "0.32", + "0.24", + "0.24", + "0.21", + "0.19", + "0.17", + "0.16", + "0.15", + ], + [ + "0.25", + "0.25", + "0.25", + "0.25", + "0.25", + "0.21", + "0.18", + "0.18", + "0.17", + "0.15", + "0.14", + "0.14", + "0.13", + ], + [ + "0.18", + "0.18", + "0.18", + "0.18", + "0.18", + "0.17", + "0.15", + "0.15", + "0.14", + "0.13", + "0.12", + "0.12", + "0.11", + ], ] -age_bands = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L"] +age_bands = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M"] wall_uvalues = [] for i, wall_type in enumerate(wall_types): @@ -209,9 +714,24 @@ for i, wall_type in enumerate(wall_types): wall_uvalues.append(row) parkhome_wall_uvalues = [ - {"Wall_type": "Park home as built", "F": "1.7", "G": "1.2", "I": "0.7", "K": "0.6"}, - {"Wall_type": "Park home with additional insulation", "F": "s1.1.2", "G": "s1.1.2", "I": "s1.1.2", - "K": "s1.1.2"} + { + "Wall_type": "Park home as built", + "F": "1.7", + "G": "1.2", + "I": "0.7", + "K": "0.6", + "L": "0.6", + "M": "0.6", + }, + { + "Wall_type": "Park home with additional insulation", + "F": "s1.1.2", + "G": "s1.1.2", + "I": "s1.1.2", + "K": "s1.1.2", + "L": "s1.1.2", + "M": "s1.1.2", + }, ] wall_uvalues.extend(parkhome_wall_uvalues) @@ -229,16 +749,12 @@ epc_wall_description_map = { "Cavity wall, as built, insulated": "Filled cavity", "Cavity wall, with external insulation": "Unfilled cavity with 100 mm external or internal insulation", "Cavity wall, insulated": "Filled cavity", - 'Cavity wall, partial insulation': "Filled cavity", - + "Cavity wall, partial insulation": "Filled cavity", "Cavity wall,": "Cavity as built", # General case of cavity wall without further details - "Cavity wall, filled cavity and external insulation": - "Filled cavity with 100 mm external or internal insulation", - "Cavity wall, filled cavity and internal insulation": - "Filled cavity with 100 mm external or internal insulation", + "Cavity wall, filled cavity and external insulation": "Filled cavity with 100 mm external or internal insulation", + "Cavity wall, filled cavity and internal insulation": "Filled cavity with 100 mm external or internal insulation", "Cavity wall, with internal insulation": "Unfilled cavity with 100 mm external or internal insulation", "Cavity wall, no insulation": "Cavity as built", - ############################ # Solid brick wall mappings ############################ @@ -247,7 +763,6 @@ epc_wall_description_map = { "Solid brick, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", "Solid brick, with external insulation": "Stone/solid brick with 100 mm external or internal insulation", "Solid brick, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation", - ############################ # Timber frame wall mappings ############################ @@ -262,33 +777,28 @@ epc_wall_description_map = { # Sandstone/limestones wall mappings ############################ "Sandstone or limestone, as built, no insulation": "Stone: sandstone or limestone as built", - "Sandstone or limestone, with internal insulation": - "Stone/solid brick with 100 mm external or internal insulation", + "Sandstone or limestone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone or limestone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal " - "insulation", + "insulation", "Sandstone, as built, no insulation": "Stone: sandstone or limestone as built", - "Sandstone or limestone, as built, insulated": - "Stone/solid brick with 100 mm external or internal insulation", + "Sandstone or limestone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone or limestone, with external insulation": "Stone/solid brick with 100 mm external or internal " - "insulation", + "insulation", "Sandstone, with external insulation": "Stone/solid brick with 100 mm external or internal insulation", "Sandstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation", - ############################ # Granite/whinstone wall mappings ############################ "Granite or whinstone, as built, no insulation": "Stone: granite or whinstone as built", - "Granite or whinstone, with internal insulation": "Stone/solid brick with 100 mm external or internal " - "insulation", - "Granite or whinstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal " - "insulation", - "Granite or whinstone, as built, insulated": "Stone/solid brick with 100 mm external or internal " - "insulation", - "Granite or whinstone, with external insulation": "Stone/solid brick with 100 mm external or internal " - "insulation", - + "Granite or whinstone, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation", + "Granite or whin, with internal insulation": "Stone/solid brick with 100 mm external or internal insulation", + "Granite or whinstone, as built, partial insulation": "Stone/solid brick with 50 mm external or internal insulation", + "Granite or whinstone, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", + "Granite or whin, as built, insulated": "Stone/solid brick with 100 mm external or internal insulation", + "Granite or whinstone, with external insulation": "Stone/solid brick with 100 mm external or internal insulation", + "Granite or whin, with external insulation": "Stone/solid brick with 100 mm external or internal insulation", ############################ # System built wall mappings ############################ @@ -297,15 +807,13 @@ epc_wall_description_map = { "System built, with internal insulation": "System build with 100 mm external or internal insulation", "System built, with external insulation": "System build with 100 mm external or internal insulation", "System built, as built, insulated": "System build with 100 mm external or internal insulation", - ############################ # Cob wall mappings ############################ "Cob, as built": "Cob as built", "Cob, with external insulation": "Cob with 100 mm external or internal insulation", "Cob, with internal insulation": "Cob with 100 mm external or internal insulation", - 'Cob,': "Cob as built", - + "Cob,": "Cob as built", ############################ # Park home mappings ############################ @@ -321,20 +829,71 @@ epc_wall_description_map = { ######################################################################################################################## s9_list = [ - {"Insulation_thickness_mm": None, "Slates_or_tiles_U_value_W_m2K": 2.3, "Thatched_roof_U_value_W_m2K": 0.35}, - {"Insulation_thickness_mm": 12, "Slates_or_tiles_U_value_W_m2K": 1.5, "Thatched_roof_U_value_W_m2K": 0.32}, - {"Insulation_thickness_mm": 25, "Slates_or_tiles_U_value_W_m2K": 1.0, "Thatched_roof_U_value_W_m2K": 0.30}, - {"Insulation_thickness_mm": 50, "Slates_or_tiles_U_value_W_m2K": 0.68, "Thatched_roof_U_value_W_m2K": 0.25}, - {"Insulation_thickness_mm": 75, "Slates_or_tiles_U_value_W_m2K": 0.50, "Thatched_roof_U_value_W_m2K": 0.22}, - {"Insulation_thickness_mm": 100, "Slates_or_tiles_U_value_W_m2K": 0.40, "Thatched_roof_U_value_W_m2K": 0.20}, - {"Insulation_thickness_mm": 150, "Slates_or_tiles_U_value_W_m2K": 0.30, "Thatched_roof_U_value_W_m2K": 0.17}, - {"Insulation_thickness_mm": 200, "Slates_or_tiles_U_value_W_m2K": 0.21, "Thatched_roof_U_value_W_m2K": 0.14}, - {"Insulation_thickness_mm": 250, "Slates_or_tiles_U_value_W_m2K": 0.17, "Thatched_roof_U_value_W_m2K": 0.12}, - {"Insulation_thickness_mm": 270, "Slates_or_tiles_U_value_W_m2K": 0.16, "Thatched_roof_U_value_W_m2K": 0.12}, - {"Insulation_thickness_mm": 300, "Slates_or_tiles_U_value_W_m2K": 0.14, "Thatched_roof_U_value_W_m2K": 0.11}, - {"Insulation_thickness_mm": 350, "Slates_or_tiles_U_value_W_m2K": 0.12, "Thatched_roof_U_value_W_m2K": 0.10}, - {"Insulation_thickness_mm": 400, "Slates_or_tiles_U_value_W_m2K": 0.11, - "Thatched_roof_U_value_W_m2K": 0.09}, + { + "Insulation_thickness_mm": None, + "Slates_or_tiles_U_value_W_m2K": 2.3, + "Thatched_roof_U_value_W_m2K": 0.35, + }, + { + "Insulation_thickness_mm": 12, + "Slates_or_tiles_U_value_W_m2K": 1.5, + "Thatched_roof_U_value_W_m2K": 0.32, + }, + { + "Insulation_thickness_mm": 25, + "Slates_or_tiles_U_value_W_m2K": 1.0, + "Thatched_roof_U_value_W_m2K": 0.30, + }, + { + "Insulation_thickness_mm": 50, + "Slates_or_tiles_U_value_W_m2K": 0.68, + "Thatched_roof_U_value_W_m2K": 0.25, + }, + { + "Insulation_thickness_mm": 75, + "Slates_or_tiles_U_value_W_m2K": 0.50, + "Thatched_roof_U_value_W_m2K": 0.22, + }, + { + "Insulation_thickness_mm": 100, + "Slates_or_tiles_U_value_W_m2K": 0.40, + "Thatched_roof_U_value_W_m2K": 0.20, + }, + { + "Insulation_thickness_mm": 150, + "Slates_or_tiles_U_value_W_m2K": 0.30, + "Thatched_roof_U_value_W_m2K": 0.17, + }, + { + "Insulation_thickness_mm": 200, + "Slates_or_tiles_U_value_W_m2K": 0.21, + "Thatched_roof_U_value_W_m2K": 0.14, + }, + { + "Insulation_thickness_mm": 250, + "Slates_or_tiles_U_value_W_m2K": 0.17, + "Thatched_roof_U_value_W_m2K": 0.12, + }, + { + "Insulation_thickness_mm": 270, + "Slates_or_tiles_U_value_W_m2K": 0.16, + "Thatched_roof_U_value_W_m2K": 0.12, + }, + { + "Insulation_thickness_mm": 300, + "Slates_or_tiles_U_value_W_m2K": 0.14, + "Thatched_roof_U_value_W_m2K": 0.11, + }, + { + "Insulation_thickness_mm": 350, + "Slates_or_tiles_U_value_W_m2K": 0.12, + "Thatched_roof_U_value_W_m2K": 0.10, + }, + { + "Insulation_thickness_mm": 400, + "Slates_or_tiles_U_value_W_m2K": 0.11, + "Thatched_roof_U_value_W_m2K": 0.09, + }, ] s10_list = [ @@ -347,7 +906,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 2.3, "Thatched_roof": 0.35, "Thatched_roof_room_in_roof": 0.25, - "Park_home": None + "Park_home": None, }, { "Age_band": "E", @@ -358,7 +917,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 1.5, "Thatched_roof": 0.35, "Thatched_roof_room_in_roof": 0.25, - "Park_home": None + "Park_home": None, }, { "Age_band": "F", @@ -369,7 +928,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 0.80, "Thatched_roof": 0.35, "Thatched_roof_room_in_roof": 0.25, - "Park_home": 1.7 + "Park_home": 1.7, }, { "Age_band": "G", @@ -380,7 +939,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": "0.50", "Thatched_roof": 0.35, "Thatched_roof_room_in_roof": 0.25, - "Park_home": 0.6 + "Park_home": 0.6, }, { "Age_band": "H", @@ -391,7 +950,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 0.35, "Thatched_roof": 0.35, "Thatched_roof_room_in_roof": 0.25, - "Park_home": None + "Park_home": None, }, { "Age_band": "I", @@ -402,7 +961,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 0.35, "Thatched_roof": 0.35, "Thatched_roof_room_in_roof": 0.25, - "Park_home": 0.35 + "Park_home": 0.35, }, { "Age_band": "J", @@ -413,7 +972,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 0.30, "Thatched_roof": 0.30, "Thatched_roof_room_in_roof": 0.25, - "Park_home": None + "Park_home": None, }, { "Age_band": "K", @@ -424,7 +983,7 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 0.25, "Thatched_roof": 0.25, "Thatched_roof_room_in_roof": 0.25, - "Park_home": 0.30 + "Park_home": 0.30, }, { "Age_band": "L", @@ -435,8 +994,8 @@ s10_list = [ "Room_in_roof_slates_or_tiles": 0.18, "Thatched_roof": 0.18, "Thatched_roof_room_in_roof": 0.18, - "Park_home": None - } + "Park_home": None, + }, ] table_s9 = pd.DataFrame(s9_list) @@ -452,22 +1011,70 @@ table_s10 = pd.DataFrame(s10_list) ######################################################################################################################## s11_list = [ - {"Age_band": "A, B", "Floor_construction": "suspended timber", "England_Wales": 0, "Scotland": 0, - "Northern_Ireland": 0, "Park_home": 0}, - {"Age_band": "C to F", "Floor_construction": "solid", "England_Wales": 0, "Scotland": 0, - "Northern_Ireland": 0, "Park_home": 0}, - {"Age_band": "G", "Floor_construction": "solid", "England_Wales": 0, "Scotland": 0, - "Northern_Ireland": 0, "Park_home": 25}, - {"Age_band": "H", "Floor_construction": "solid", "England_Wales": 0, "Scotland": 25, - "Northern_Ireland": 25, "Park_home": 0}, - {"Age_band": "I", "Floor_construction": "solid", "England_Wales": 25, "Scotland": 50, - "Northern_Ireland": 50, "Park_home": 50}, - {"Age_band": "J", "Floor_construction": "solid", "England_Wales": 75, "Scotland": 75, - "Northern_Ireland": 0, "Park_home": 0}, - {"Age_band": "K", "Floor_construction": "solid", "England_Wales": 100, "Scotland": 100, - "Northern_Ireland": 100, "Park_home": 70}, - {"Age_band": "L", "Floor_construction": "solid", "England_Wales": 100, "Scotland": 120, - "Northern_Ireland": 100, "Park_home": 0}, + { + "Age_band": "A, B", + "Floor_construction": "suspended timber", + "England_Wales": 0, + "Scotland": 0, + "Northern_Ireland": 0, + "Park_home": 0, + }, + { + "Age_band": "C to F", + "Floor_construction": "solid", + "England_Wales": 0, + "Scotland": 0, + "Northern_Ireland": 0, + "Park_home": 0, + }, + { + "Age_band": "G", + "Floor_construction": "solid", + "England_Wales": 0, + "Scotland": 0, + "Northern_Ireland": 0, + "Park_home": 25, + }, + { + "Age_band": "H", + "Floor_construction": "solid", + "England_Wales": 0, + "Scotland": 25, + "Northern_Ireland": 25, + "Park_home": 0, + }, + { + "Age_band": "I", + "Floor_construction": "solid", + "England_Wales": 25, + "Scotland": 50, + "Northern_Ireland": 50, + "Park_home": 50, + }, + { + "Age_band": "J", + "Floor_construction": "solid", + "England_Wales": 75, + "Scotland": 75, + "Northern_Ireland": 0, + "Park_home": 0, + }, + { + "Age_band": "K", + "Floor_construction": "solid", + "England_Wales": 100, + "Scotland": 100, + "Northern_Ireland": 100, + "Park_home": 70, + }, + { + "Age_band": "L", + "Floor_construction": "solid", + "England_Wales": 100, + "Scotland": 120, + "Northern_Ireland": 100, + "Park_home": 0, + }, ] table_s11 = pd.DataFrame(s11_list) @@ -481,21 +1088,90 @@ table_s11 = pd.DataFrame(s11_list) ######################################################################################################################## s12_list = [ - {"age_band": "A", "insulation_0": 1.2, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - {"age_band": "B", "insulation_0": 1.2, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - {"age_band": "C", "insulation_0": 1.2, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - {"age_band": "D", "insulation_0": 1.2, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - {"age_band": "E", "insulation_0": 1.2, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - {"age_band": "F", "insulation_0": 1.2, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - {"age_band": "G", "insulation_0": 1.2, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - - {"age_band": "H", "insulation_0": 0.51, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - {"age_band": "I", "insulation_0": 0.51, "insulation_50": 0.5, "insulation_100": 0.3, "insulation_150": 0.22}, - - {"age_band": "J", "insulation_0": 0.25, "insulation_50": 0.25, "insulation_100": 0.25, "insulation_150": 0.22}, - - {"age_band": "K", "insulation_0": 0.22, "insulation_50": 0.22, "insulation_100": 0.22, "insulation_150": 0.22}, - {"age_band": "L", "insulation_0": 0.22, "insulation_50": 0.22, "insulation_100": 0.22, "insulation_150": 0.22}, + { + "age_band": "A", + "insulation_0": 1.2, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "B", + "insulation_0": 1.2, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "C", + "insulation_0": 1.2, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "D", + "insulation_0": 1.2, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "E", + "insulation_0": 1.2, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "F", + "insulation_0": 1.2, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "G", + "insulation_0": 1.2, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "H", + "insulation_0": 0.51, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "I", + "insulation_0": 0.51, + "insulation_50": 0.5, + "insulation_100": 0.3, + "insulation_150": 0.22, + }, + { + "age_band": "J", + "insulation_0": 0.25, + "insulation_50": 0.25, + "insulation_100": 0.25, + "insulation_150": 0.22, + }, + { + "age_band": "K", + "insulation_0": 0.22, + "insulation_50": 0.22, + "insulation_100": 0.22, + "insulation_150": 0.22, + }, + { + "age_band": "L", + "insulation_0": 0.22, + "insulation_50": 0.22, + "insulation_100": 0.22, + "insulation_150": 0.22, + }, ] table_s12 = pd.DataFrame(s12_list) diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index 602684cf..0794013e 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -201,6 +201,11 @@ def get_wall_u_value( ) ) else: + + # Handle rare edge case + if clean_description == "": + return 0 + mapped_description = epc_wall_description_map[clean_description] mapped_value = wall_uvalues_df[ @@ -239,42 +244,52 @@ def get_wall_u_value( return float(mapped_value) +def _try_convert_to_int(value): + try: + return int(value) + except (TypeError, ValueError): + return None + + def extract_thickness(thickness, is_roof_room, is_at_rafters, is_loft, is_flat): + thickness_map = { + "below average": "50", + "average": "100", + "above average": "150", + "none": "0", + } + + # Normalise none value early + if thickness is None: + thickness = "none" + if is_roof_room or is_at_rafters: - # TODO: We get None instead of a string none, this should be fixed - if thickness is None: - thickness = "none" + + int_thickness = _try_convert_to_int(thickness) + if int_thickness is not None: + return int_thickness # We re-map the thickness - thickness_map = { - "below average": "50", - "average": "100", - "above average": "150", - "none": "0", - } - thickness = thickness_map[thickness] + + thickness = thickness_map.get(thickness) + if thickness is None: + return None + + return int(thickness) if is_flat: - try: - thickness = int(thickness) - return thickness - except (TypeError, ValueError): - # If thickness is not a valid number (could be a string or None), return None - return None + return _try_convert_to_int(thickness) - if thickness in ["below average", "average", "above average", "none", None] or ( - not is_loft and not is_roof_room and not is_at_rafters + # Thicknes will never be none + if thickness in thickness_map or ( + not (is_loft or is_roof_room or is_at_rafters) ): return None - elif thickness.endswith("+"): - thickness = int(thickness[:-1]) - return thickness - else: - try: - thickness = int(thickness) - return thickness - except ValueError: - # If thickness is not a valid number (could be a string or None), return None - return None + + if isinstance(thickness, str) and str(thickness).endswith("+"): + return _try_convert_to_int(thickness[:-1]) + + # final attempt + return _try_convert_to_int(thickness) def get_u_value_from_s9( @@ -553,7 +568,15 @@ def get_floor_u_value( lambda_ins = 0.035 # thermal conductivity of floor insulation in W/m·K wall_thickness = [ x[age_band] for x in default_wall_thickness if x["type"] == wall_type - ][0] + ] + if not wall_thickness: + # In some cases, we may estimate an EPC and end up with a slightly mixed EPC, with some fields associated + # to a new build and others to an existing. So we might end up with a None wall type here, because of this. + # If this happens, nothing will be in the wall_thickness list so this is the fallback, the defauly thickness + # for many EPC assessment systems like Elmhurst + wall_thickness = 300 + else: + wall_thickness = wall_thickness[0] if wall_thickness is None and wall_type == "park home": # We don't know enough and likely won't make recommendations return 0 diff --git a/recommendations/tests/test_data/heating_recommendations_data.py b/recommendations/tests/test_data/heating_recommendations_data.py index f4b4c0a6..37c854c3 100644 --- a/recommendations/tests/test_data/heating_recommendations_data.py +++ b/recommendations/tests/test_data/heating_recommendations_data.py @@ -86,7 +86,7 @@ testing_examples = [ 'uprn-source': 'Address Matched', }, "heating_measure_types": [ - "high_heat_retention_storage_heater", + "high_heat_retention_storage_heaters", ], "notes": "This property has electric room heaters and is off gas so a boiler recommendation is not appropriate." "We would expect a high heat retention storage recommendation. The property is a flat and therefore" @@ -134,7 +134,7 @@ testing_examples = [ 'tenure': 'owner-occupied', 'fixed-lighting-outlets-count': 6.0, 'low-energy-fixed-light-count': 4.0, 'uprn': 100090311351.0, 'uprn-source': 'Address Matched', 'property-type_y': None, 'built-form_y': None, }, - "heating_measure_types": ['high_heat_retention_storage_heater', 'air_source_heat_pump'], + "heating_measure_types": ['high_heat_retention_storage_heaters', 'air_source_heat_pump'], "notes": "This test has electric storage heaters with automatic charge control - we recommend hhr storage" "heaters in this case, but because there are already electic storage heaters in place, we " "note, in the description of the recommendation, that this upgrade may be possible by retrofitting" @@ -275,7 +275,7 @@ testing_examples = [ 'uprn': 43088770.0, 'uprn-source': 'Address Matched', }, "heating_measure_types": [ - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', ], "notes": "This property is a flat so we don't have an ASHP recommendation. It also doesn't have access to the " "mains and so it can't have a gas boiler. We don't expect any controls recommendations" @@ -370,7 +370,7 @@ testing_examples = [ }, "heating_measure_types": [ 'boiler_upgrade', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', 'boiler_upgrade' ], "notes": "This property has assumed electric heating and is mid-terrace house. It has a mains gas connection." @@ -416,7 +416,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', ], "notes": "This property has an oil boiler and doesn't have a mains gas connection so we can only recommend" "an air source heat pump and HHR (since if the home has a non-gas boiler, we recommend HHR)" @@ -463,7 +463,7 @@ testing_examples = [ }, "heating_measure_types": [ 'boiler_upgrade', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', 'air_source_heat_pump', 'boiler_upgrade' # TTZs ], @@ -512,7 +512,7 @@ testing_examples = [ "heating_measure_types": [ 'boiler_upgrade', 'boiler_upgrade', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', ], "notes": "This property has assumed electric heaters. Boiler upgrade, HHR are recommended. We don't recommend" "an ASHP off of the bat because it's mid-terrace." @@ -557,7 +557,7 @@ testing_examples = [ }, "heating_measure_types": [ 'boiler_upgrade', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', 'boiler_upgrade' ], "notes": "This has a form of assumed electric heating and has a mains connection so we recommend HHR, boiler" @@ -605,7 +605,7 @@ testing_examples = [ "heating_measure_types": [ 'boiler_upgrade', 'boiler_upgrade', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', ], "notes": "This property already has storage heaters with manual charge control. The home is mid terrace so" "the ashp is not suitable" @@ -651,7 +651,7 @@ testing_examples = [ 'uprn-source': 'Address Matched', 'sheating-energy-eff': None, 'sheating-env-eff': None }, "heating_measure_types": [ - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', 'air_source_heat_pump', ], "notes": "This property has an LFG boiler but it doesn't have a mains gas connection so we can only recommend" @@ -696,7 +696,7 @@ testing_examples = [ 'uprn-source': 'Energy Assessor', 'sheating-energy-eff': None, 'sheating-env-eff': None }, "heating_measure_types": [ - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', 'air_source_heat_pump', ], "notes": "This property has electric boilers in place, but does not have a mains connection so we don't " @@ -744,7 +744,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater' + 'high_heat_retention_storage_heaters' ], "notes": "This property has a dual fuel boiler and no mains gas connection. We recommend ASHP and HHR, but" "no gas condensing boiler" @@ -788,7 +788,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', ], "notes": "This property has a coal boiler and no mains gas connection. We recommend ASHP and HHR, but" "no gas condensing boiler" @@ -835,7 +835,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', ], "notes": "This property has a smokeless fuel boiler and no mains gas connection. We recommend ASHP and HHR, but" "no gas condensing boiler" @@ -880,7 +880,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', ], "notes": "This property has a wood pellets boiler and no mains gas connection. We recommend ASHP and HHR, but" "no gas condensing boiler" @@ -925,7 +925,7 @@ testing_examples = [ 'uprn-source': 'Address Matched', 'sheating-energy-eff': None, 'sheating-env-eff': None }, "heating_measure_types": [ - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', 'air_source_heat_pump', ], "notes": "This is an end-terrace house, without mains gas connection, so we recommend is HHR & ASHP" @@ -1010,7 +1010,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater', + 'high_heat_retention_storage_heaters', 'time_temperature_zone_control', ], "notes": "This property has dual heating. A boiler and electric storage heaters. The heating is efficient so" @@ -1056,8 +1056,8 @@ testing_examples = [ "heating_measure_types": [ 'air_source_heat_pump', 'boiler_upgrade', - 'boiler_upgrade+high_heat_retention_storage_heater', - 'high_heat_retention_storage_heater', + 'boiler_upgrade+high_heat_retention_storage_heaters', + 'high_heat_retention_storage_heaters', 'time_temperature_zone_control' ], "notes": "This property is a modified version of the previous dual heating property, where we lower the" @@ -1104,7 +1104,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater' + 'high_heat_retention_storage_heaters' ], "notes": "This property has anthracite heating without mains. " "We recommend ASHP and HHR, but no gas condensing boiler" @@ -1151,7 +1151,7 @@ testing_examples = [ "heating_measure_types": [ 'boiler_upgrade', 'boiler_upgrade', - 'high_heat_retention_storage_heater' + 'high_heat_retention_storage_heaters' ], "notes": "This property has room heaters with two different fuel sources, so we recommend HHR, ASHP, and a " "boiler upgrade" @@ -1238,7 +1238,7 @@ testing_examples = [ }, "heating_measure_types": [ 'air_source_heat_pump', - 'high_heat_retention_storage_heater' + 'high_heat_retention_storage_heaters' ], "notes": "The property has warm air electricaire heating, so we recommend ASHP and HHR" }, diff --git a/recommendations/tests/test_heating_recommendations.py b/recommendations/tests/test_heating_recommendations.py index b18839aa..93acdefa 100644 --- a/recommendations/tests/test_heating_recommendations.py +++ b/recommendations/tests/test_heating_recommendations.py @@ -105,3 +105,108 @@ class TestHeatingRecommendations: {x["measure_type"] for x in recommender.heating_recommendations} == set(test_case["heating_measure_types"]) ) + + +@pytest.mark.parametrize( + "floor_area, epc_primary, expected_band, expected_model", + [ + # Case 1 – Typical pre-2000 house, gas heating + ( + 93.75, + 270.19, + (2.5, 4.6), # expected rough band (low, high) + 5, # chosen model + ), + # Case 2 – Efficient new-build (low EPC energy) + ( + 93.75, + 142.28, + (1.4, 2.4), + 3, # assume 3 or 5 kW model covers this + ), + ], +) +def test_estimate_peak_kw_basic(floor_area, epc_primary, expected_band, expected_model): + """ + Ensure the peak load estimate is within a sensible range and + that the model selection logic picks the correct bracket. + """ + + load_band = HeatingRecommender.estimate_peak_kw( + floor_area_m2=floor_area, + epc_primary_kwh_per_m2_yr=epc_primary, + primary_to_delivered_factor=1.55, # electricity + space_heat_fraction_range=(0.35, 0.60), + hdd_base_dd=2000.0, + t_indoor_C=21.0, + t_design_ext_C=-1.0, + ) + + # Assert range sanity + assert expected_band[0] * 0.8 <= load_band[0] <= expected_band[1] * 1.2 + assert expected_band[0] <= load_band[1] <= expected_band[1] * 1.2 + + # Pick model + model = HeatingRecommender.pick_model(load_band, models_kw=(3, 5, 6, 8.5, 11.2)) + assert model == expected_model + + +def test_estimate_peak_kw_with_hlp(): + """ + Test direct HLP input path (best-quality data). + """ + hlp = 1.5 # W/m²K typical for semi-detached + floor_area = 100 + load_band = HeatingRecommender.estimate_peak_kw( + floor_area_m2=floor_area, + heat_loss_parameter_W_per_m2K=hlp, + t_indoor_C=21, + t_design_ext_C=-2, + ) + # Should return identical low/high values since it's direct + assert isinstance(load_band, tuple) + assert abs(load_band[0] - load_band[1]) < 1e-6 + # Expected peak = 1.5 * 100 * 23 / 1000 = 3.45 kW + assert pytest.approx(load_band[0], rel=0.05) == 3.45 + + +def test_estimate_peak_kw_with_space_heat_demand(): + """ + Test the space-heating-demand path. + """ + floor_area = 120 + space_heat_kwh_m2 = 100 + load_band = HeatingRecommender.estimate_peak_kw( + floor_area_m2=floor_area, + space_heat_kwh_per_m2_yr=space_heat_kwh_m2, + hdd_base_dd=2100, + t_indoor_C=21, + t_design_ext_C=-3, + ) + # Rough expected peak ~ (100*120*1000)/(2100*24) * 24 /1000 = 5.4 kW + assert 4.5 < load_band[0] < 6.0 + assert abs(load_band[0] - load_band[1]) < 1e-6 + + +def test_pick_model_boundaries(): + """ + Ensure pick_model correctly selects the smallest model covering the upper band. + """ + assert HeatingRecommender.pick_model((2.0, 4.9), models_kw=(3, 5, 6, 8.5)) == 5 + assert HeatingRecommender.pick_model((5.0, 5.0), models_kw=(3, 5, 6, 8.5)) == 5 + assert HeatingRecommender.pick_model((5.0, 6.1), models_kw=(3, 5, 6, 8.5)) == 6 + assert HeatingRecommender.pick_model((8.6, 9.0), models_kw=(3, 5, 6, 8.5, 11.2)) == 11.2 + assert HeatingRecommender.pick_model((20, 25), models_kw=(3, 5, 6, 8.5, 11.2)) is None + + +def test_parameter_validation_and_defaults(): + """ + Validate that the function handles missing or minimal parameters properly. + """ + # Minimal path using primary energy only + load_band = HeatingRecommender.estimate_peak_kw( + floor_area_m2=80, + epc_primary_kwh_per_m2_yr=250, + ) + assert isinstance(load_band, tuple) + assert load_band[0] < load_band[1] diff --git a/recommendations/tests/test_optimiser_functions.py b/recommendations/tests/test_optimiser_functions.py index 430acaa8..ea0b5d94 100644 --- a/recommendations/tests/test_optimiser_functions.py +++ b/recommendations/tests/test_optimiser_functions.py @@ -8,11 +8,15 @@ from recommendations.optimiser.CostOptimiser import CostOptimiser class TestPrepareInputMeasures: + def test_returns_expected_structure_without_ventilation(self): recs = [ [ # loft insulation measure {"recommendation_id": "loft1", "type": "loft_insulation", "total": 100, "kwh_savings": 200, - "energy_cost_savings": 10, "has_battery": False, "measure_type": "loft_insulation"}, + "energy_cost_savings": 10, "has_battery": False, "measure_type": "loft_insulation", + "partial_project_funding": 0, "partial_project_score": 0, + "uplift_project_score": 0, + }, ], ] measures = optimiser_functions.prepare_input_measures(recs, goal="Energy Savings", needs_ventilation=False) @@ -27,9 +31,12 @@ class TestPrepareInputMeasures: ["internal_wall_insulation"]) recs = [ [{"recommendation_id": "wall1", "type": "internal_wall_insulation", "total": 500, "kwh_savings": 300, - "energy_cost_savings": 5, "has_battery": False, "measure_type": "internal_wall_insulation"}], + "energy_cost_savings": 5, "has_battery": False, "measure_type": "internal_wall_insulation", + "partial_project_funding": 0, "partial_project_score": 0, "uplift_project_score": 0, + }], [{"recommendation_id": "vent1", "type": "mechanical_ventilation", "total": 50, "kwh_savings": 30, - "energy_cost_savings": 5, "has_battery": False, "measure_type": "mechanical_ventilation"}], + "energy_cost_savings": 5, "has_battery": False, "measure_type": "mechanical_ventilation", + "partial_project_funding": 0, "partial_project_score": 0, "uplift_project_score": 0, }], ] measures = optimiser_functions.prepare_input_measures(recs, goal="Energy Savings", needs_ventilation=True) wall_option = measures[0][0] @@ -40,7 +47,8 @@ class TestPrepareInputMeasures: def test_filters_out_negative_cost_savings(self): recs = [ [{"recommendation_id": "bad1", "type": "loft_insulation", "total": 200, "kwh_savings": 100, - "energy_cost_savings": -5, "has_battery": False}], + "energy_cost_savings": -5, "has_battery": False, + "partial_project_funding": 0, "partial_project_score": 0, "uplift_project_score": 0, }], ] measures = optimiser_functions.prepare_input_measures(recs, goal="Energy Savings", needs_ventilation=False) assert measures == [] # should skip negative cost saving recs @@ -77,6 +85,22 @@ class TestCalculateGain: gain = optimiser_functions.calculate_gain(body, prop, fixed_gain=0) assert gain is None + def test_returns_zero_for_already_installed_getting_to_target(self): + body = SimpleNamespace(goal="Increasing EPC", goal_value="C") + p = SimpleNamespace(data={"current-energy-efficiency": "67"}, id=1) + fixed_gain = 0 + eco_packages = {1: (None, None, None, [])} + already_installed_sap = 2 + gain = optimiser_functions.calculate_gain( + body=body, + p=p, + fixed_gain=fixed_gain, + eco_packages=eco_packages, + already_installed_gain=already_installed_sap + ) + + assert gain == 0 + def test_calculates_gain_for_epc(self, monkeypatch): # patch cost optimiser calculation monkeypatch.setattr(optimiser_functions, "epc_to_sap_lower_bound", lambda goal_value: 69) @@ -149,14 +173,14 @@ class TestIncreasingEpcE2e: @pytest.fixture def setup_case(self): - # ✅ Dummy property object + # Dummy property object p = SimpleNamespace( id="P1", has_ventilation=False, data={"current-energy-efficiency": "52"}, ) - # ✅ Dummy request body + # Dummy request body body = SimpleNamespace( goal="Increasing EPC", goal_value="C", @@ -165,9 +189,6 @@ class TestIncreasingEpcE2e: simulate_sap_10=False, required_measures=[] ) - - # ✅ Use your massive measures_to_optimise list - recommendations = {"P1": measures_to_optimise} return p, body, recommendations @@ -190,6 +211,18 @@ class TestIncreasingEpcE2e: assert needs_ventilation + # Input the various things we need - set all to 0 + for group in measures_to_optimise: + for r in group: + ( + r["partial_project_score"], + r["partial_project_funding"], + r["innovation_uplift"], + r["uplift_project_score"], + ) = ( + 0, 0, 0, 0 + ) + input_measures = optimiser_functions.prepare_input_measures(measures_to_optimise, body.goal, needs_ventilation) assert input_measures, "Expected measures to optimise" diff --git a/recommendations/tests/test_optimisers.py b/recommendations/tests/test_optimisers.py index df5cc2e1..ecc6ea56 100644 --- a/recommendations/tests/test_optimisers.py +++ b/recommendations/tests/test_optimisers.py @@ -1,97 +1,14 @@ -import numpy as np -# import pandas as pd from pandas import Timestamp from numpy import nan import datetime -# import backend.app.assumptions as assumptions -# import recommendations.optimiser.optimiser_functions as optimiser_functions -# -# from backend.Funding import Funding -# -# project_scores_matrix = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv") -# partial_project_scores_matrix = pd.read_csv("backend/tests/test_data/ECO4_Partial_Project_Scores_Matrix_v6.csv") -# partial_project_scores_matrix.columns = ['Measure category', 'Measure_Type', 'Pre_Main_Heating_Source', -# 'Post_Main_Heating_Source', 'Total Floor Area Band', 'Starting Band', -# 'Average Treatable Factor', 'Cost Savings', 'SAP Savings'] -# whlg_eligible_postcodes = pd.DataFrame([{"Postcode": "ab12cd"}]) -# -# funding = Funding( -# project_scores_matrix=project_scores_matrix, -# partial_project_scores_matrix=partial_project_scores_matrix, -# whlg_eligible_postcodes=whlg_eligible_postcodes, -# eco4_social_cavity_abs_rate=13.5, -# eco4_social_solid_abs_rate=17, -# eco4_private_cavity_abs_rate=13.5, -# eco4_private_solid_abs_rate=17, -# gbis_social_cavity_abs_rate=21, -# gbis_social_solid_abs_rate=25, -# gbis_private_cavity_abs_rate=22, -# gbis_private_solid_abs_rate=28, -# tenure="Social" -# ) -# -# # Assume these costs have been adjusted - - -# -# # Insert the funding uplifts -# for recs in property_recommendations: -# for r in recs: -# # Insert randomly -# # Select one of 0, 0.25 or 0.45 -# r["uplift"] = np.random.choice([0, 0.25, 0.45]) -# -# # We calculate the innovation uplift against each measure -# for recs in property_recommendations: -# for r in recs: -# if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating"]: -# r["innovation_uplift"] = 0 -# continue -# r["innovation_uplift"] = funding.get_innovation_uplift( -# measure=r, -# starting_sap=p.data["current-energy-efficiency"], -# floor_area=p.floor_area, -# is_cavity=False, -# current_wall_uvalue=1.7, -# is_partial=False, -# existing_li_thickness=150, -# mainheating=p.main_heating, -# main_fuel=p.main_fuel, -# mainheat_energy_eff=p.data["mainheat-energy-eff"], -# ) -# print(r["innovation_uplift"]) -# -# property_measure_types = {rec["type"] for recs in property_recommendations for rec in recs} -# property_required_measures = [m for m in property_recommendations if m[0]["type"] in []] -# measures_to_optimise = [m for m in property_recommendations if m[0]["type"] not in []] -# -# # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore -# # its inclusion -# needs_ventilation = any( -# x in property_measure_types for x in assumptions.measures_needing_ventilation -# ) and not p.has_ventilation -# -# input_measures = optimiser_functions.prepare_input_measures( -# measures_to_optimise, "Increasing EPC", needs_ventilation, True -# ) -# -# # ---- main wrapper around your optimiser ---------------------------------- -# -# # Run inputs: -# target_gain = 18.5 -# -# # Run the optimiser with these inouts - - -# tests/test_social_fabric_only.py import numpy as np import pandas as pd import pytest from copy import deepcopy from recommendations.optimiser import optimiser_functions -from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths # wherever you defined it +from recommendations.optimiser.funding_optimiser import optimise_with_funding_paths, build_heat_pump_paths from backend.Funding import Funding from backend.app.plan.schemas import WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES, ECO4_ELIGIBILE_FABRIC_MEASURES @@ -144,6 +61,15 @@ class DummyProp: self.has_ventilation = False self.floor_area = 70.0 self.main_heating_controls = {"clean_description": "time and temperature zone control"} + self.walls = {'original_description': 'Solid brick, as built, no insulation (assumed)', + 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, + 'is_solid_brick': True, + 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, + 'is_as_built': True, + 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, + 'insulation_thickness': 'none', + 'external_insulation': False, 'internal_insulation': False} self.main_heating = { 'original_description': 'Boiler and radiators, mains gas', @@ -230,6 +156,7 @@ def property_recommendations(): 'quantity_unit': 'm2', 'total': 19090.810139104888, 'labour_hours': 0.0, 'labour_days': 0.0}], 'type': 'external_wall_insulation', 'measure_type': 'external_wall_insulation', + "innovation_rate": 0, 'description': 'Install 150mm EWI Pro EPS external wall insulation system with Brick ' 'Slip finish on external walls', 'starting_u_value': 1.7, 'new_u_value': 0.32, 'already_installed': False, @@ -258,6 +185,7 @@ def property_recommendations(): 'quantity_unit': 'm2', 'total': 5694.929118083911, 'labour_hours': 134.37473199973275, 'labour_days': 4.199210374991648}], 'type': 'internal_wall_insulation', 'measure_type': 'internal_wall_insulation', + "innovation_rate": 0, 'description': 'Install 95mm ' 'SWIP EcoBatt & ' 'Plastered ' @@ -314,6 +242,7 @@ def property_recommendations(): 'quantity_unit': 'm2', 'total': 645.0, 'labour_hours': 8, 'labour_days': 1}], 'type': 'loft_insulation', 'measure_type': 'loft_insulation', + "innovation_rate": 0, 'description': 'Install 300mm of Knauf Loft Roll 44 glass fibre roll in your loft', 'starting_u_value': 2.3, 'new_u_value': 2.3, 'sap_points': np.float64(2.4), 'already_installed': False, @@ -338,6 +267,7 @@ def property_recommendations(): 'plant_cost': 0.0, 'total_cost': 350.0, 'notes': None, 'is_installer_quote': True, 'total': 700.0, 'quantity': 2, 'quantity_unit': 'part'}], 'type': 'mechanical_ventilation', 'measure_type': 'mechanical_ventilation', + "innovation_rate": 0, 'description': 'Install 2 ' 'Mechanical ' 'Extract ' @@ -387,6 +317,7 @@ def property_recommendations(): 'labour_hours': 70.08999999999999, 'labour_days': 2.920416666666666}], 'type': 'suspended_floor_insulation', 'measure_type': 'suspended_floor_insulation', + "innovation_rate": 0, 'description': 'Install 75mm Q-bot underfloor insulation insulation in suspended ' 'floor', 'starting_u_value': 0.83, 'new_u_value': 0.22, 'sap_points': 2, 'survey': True, @@ -401,6 +332,7 @@ def property_recommendations(): 'energy_cost_savings': np.float64(76.04936470588231)}], [ {'phase': 4, 'parts': [], 'type': 'low_energy_lighting', 'measure_type': 'low_energy_lighting', + "innovation_rate": 0, 'description': 'Install low energy lighting in -886 outlets', 'starting_u_value': None, 'new_u_value': None, 'already_installed': False, 'sap_points': 2, 'kwh_savings': -48508.5, 'energy_cost_savings': -12481.237049999998, @@ -413,6 +345,7 @@ def property_recommendations(): 'recommendation_id': '5_phase=4', 'efficiency': -1705.5500000000002, 'heat_demand': np.float64(5.099999999999994)}], [ {'type': 'heating', 'phase': 5, 'measure_type': 'time_temperature_zone_control', + "innovation_rate": 0, 'parts': [], 'description': 'Upgrade heating controls to Smart Thermostats, room sensors and ' 'smart radiator valves (time & temperature zone control)', @@ -431,6 +364,7 @@ def property_recommendations(): 'energy_cost_savings': np.float64(65.29581176470589)}], [ {'phase': 6, 'parts': [], 'type': 'secondary_heating', 'measure_type': 'secondary_heating', + "innovation_rate": 0, 'description': 'Remove the secondary heating system', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(3.6), 'already_installed': False, 'total': 30.0, 'subtotal': 25.0, 'vat': 5.0, 'labour_hours': 3.0, @@ -443,6 +377,7 @@ def property_recommendations(): 'kwh_savings': np.float64(196.29999999999927), 'energy_cost_savings': np.float64(14.61857647058821)}], [ {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 4.0 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(13.0), 'already_installed': False, 'total': 6013.139999999999, 'subtotal': 5010.95, 'vat': 0, @@ -455,6 +390,7 @@ def property_recommendations(): 'kwh_savings': np.float64(2040.8566307499998), 'energy_cost_savings': np.float64(525.1124110919749)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 4.0 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(13.0), 'already_installed': False, 'total': 10537.008, 'subtotal': 8780.84, 'vat': 0, @@ -467,6 +403,7 @@ def property_recommendations(): 'kwh_savings': np.float64(2857.1992830499994), 'energy_cost_savings': np.float64(735.1573755287648)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 3.6 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(12.0), 'already_installed': False, 'total': 5826.491999999999, 'subtotal': 4855.41, 'vat': 0, @@ -478,6 +415,7 @@ def property_recommendations(): 'heat_demand': np.float64(83.69999999999999), 'kwh_savings': np.float64(1846.33397), 'energy_cost_savings': np.float64(475.0617304809999)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 3.6 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(12.0), 'already_installed': False, 'total': 10350.359999999999, 'subtotal': 8625.3, 'vat': 0, @@ -489,6 +427,7 @@ def property_recommendations(): 'heat_demand': np.float64(83.69999999999999), 'kwh_savings': np.float64(2584.867558), 'energy_cost_savings': np.float64(665.0864226734)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 3.2 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(11.0), 'already_installed': False, 'total': 5642.604, 'subtotal': 4702.17, 'vat': 0, @@ -500,6 +439,7 @@ def property_recommendations(): 'kwh_savings': np.float64(1650.2708274), 'energy_cost_savings': np.float64(424.61468389001993)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 3.2 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(11.0), 'already_installed': False, 'total': 10166.472, 'subtotal': 8472.06, 'vat': 0, @@ -511,6 +451,7 @@ def property_recommendations(): 'heat_demand': np.float64(78.3), 'kwh_savings': np.float64(2310.3791583599996), 'energy_cost_savings': np.float64(594.4605574460278)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.8 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(9.0), 'already_installed': False, 'total': 5458.727999999999, 'subtotal': 4548.94, 'vat': 0, @@ -522,6 +463,7 @@ def property_recommendations(): 'kwh_savings': np.float64(1453.5933906), 'energy_cost_savings': np.float64(374.00957940138)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.8 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(9.0), 'already_installed': False, 'total': 9982.596, 'subtotal': 8318.83, 'vat': 0, @@ -533,6 +475,7 @@ def property_recommendations(): 'heat_demand': np.float64(64.0), 'kwh_savings': np.float64(2035.03074684), 'energy_cost_savings': np.float64(523.6134111619319)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.4 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(8.0), 'already_installed': False, 'total': 5274.852, 'subtotal': 4395.71, 'vat': 0, @@ -544,6 +487,7 @@ def property_recommendations(): 'kwh_savings': np.float64(1255.12594), 'energy_cost_savings': np.float64(322.94390436199996)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.4 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(8.0), 'already_installed': False, 'total': 9798.72, 'subtotal': 8165.6, 'vat': 0, @@ -555,6 +499,7 @@ def property_recommendations(): 'heat_demand': np.float64(54.3), 'kwh_savings': np.float64(1757.1763159999998), 'energy_cost_savings': np.float64(452.1214661067999)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.0 kilowatt-peak (kWp) solar panel system.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(7.0), 'already_installed': False, 'total': 5090.976, 'subtotal': 4242.48, 'vat': 0, @@ -566,6 +511,7 @@ def property_recommendations(): 'kwh_savings': np.float64(1048.341318), 'energy_cost_savings': np.float64(269.7382211214)}, {'phase': 7, 'parts': [], 'type': 'solar_pv', 'measure_type': 'solar_pv', + "innovation_rate": 0, 'description': 'Install a 2.0 kilowatt-peak (kWp) solar panel system, with a battery.', 'starting_u_value': None, 'new_u_value': None, 'sap_points': np.float64(7.0), 'already_installed': False, 'total': 9614.844, 'subtotal': 8012.369999999999, 'vat': 0, @@ -586,10 +532,20 @@ def _attach_costs_and_uplifts(recs, funding, p): for group in out: for r in group: if r["type"] in ["mechanical_ventilation", "low_energy_lighting", "secondary_heating"]: - r["innovation_uplift"] = 0 + ( + r["partial_project_score"], + r["partial_project_funding"], + r["innovation_uplift"], + r["uplift_project_score"], + ) = ( + 0, 0, 0, 0 + ) continue - r["uplift"] = 0.0 # fixed for determinism in test - r["innovation_uplift"] = funding.get_innovation_uplift( + + ( + r["partial_project_score"], r["partial_project_funding"], r["innovation_uplift"], + r["uplift_project_score"] + ) = funding.get_innovation_uplift( measure=r, starting_sap=55, floor_area=70.0, @@ -663,3 +619,111 @@ def test_social_fabric_only_returns_only_fabric_types(p, funding, property_recom unfunded_rows = solutions[ solutions["path"].apply(lambda x: isinstance(x, dict) and x.get("reference") == "unfunded:all")] assert not unfunded_rows.empty + + +def test_private_solid_wall_no_innovation_epc_d(p, funding, mock_project_scores_matrix, mock_partial_scores_matrix): + """ + We have a specific test for this case which was implemented incorrectly originally. + This is an EPC D property and so shouldn't be eligible for ECO4. Instead, only GBIS should be considered. + """ + + # Overwrite the data - copied from real example + p2 = deepcopy(p) + p2.data = { + "current-energy-rating": "D", + "current-energy-efficiency": 68, + "mainheat-energy-eff": "Good", + } + p2.walls = {'original_description': 'Sandstone or limestone, as built, no insulation (assumed)', + 'clean_description': 'Sandstone or limestone, as built, no insulation', 'thermal_transmittance': None, + 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, + 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False, + 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_assumed': True, + 'is_sandstone_or_limestone': True, 'is_park_home': False, 'insulation_thickness': 'none', + 'external_insulation': False, 'internal_insulation': False} + + funding2 = Funding( + tenure="Private", + project_scores_matrix=mock_project_scores_matrix, + partial_project_scores_matrix=mock_partial_scores_matrix, + whlg_eligible_postcodes=pd.DataFrame([{"Postcode": "ab12cd"}]), + eco4_social_cavity_abs_rate=12.5, + eco4_social_solid_abs_rate=17, + eco4_private_cavity_abs_rate=12.5, + eco4_private_solid_abs_rate=17, + gbis_social_cavity_abs_rate=21, + gbis_social_solid_abs_rate=25, + gbis_private_cavity_abs_rate=21, + gbis_private_solid_abs_rate=28, + ) + + input_measures = [ + [{'id': '0_phase=0', 'cost': np.float64(4441.202499013676), 'gain': np.float64(3.4000000000000057), + 'type': 'internal_wall_insulation+mechanical_ventilation', 'innovation_uplift': np.float64(0.0), + 'cost_minus_uplift': np.float64(4441.202499013676), 'raw_cost': 3881.2024990136756, + 'partial_project_funding': np.float64(2300.1000000000004), 'partial_project_score': np.float64(135.3), + 'uplift_project_score': np.float64(0.0)}], [ + {'id': '2_phase=2', 'cost': np.float64(2280.0), 'gain': np.float64(0.4), 'type': 'secondary_glazing', + 'innovation_uplift': np.float64(0.0), 'cost_minus_uplift': np.float64(2280.0), + 'raw_cost': np.float64(2280.0), 'partial_project_funding': np.float64(1421.1999999999998), + 'partial_project_score': np.float64(83.6), 'uplift_project_score': np.float64(0.0)}], [ + {'id': '3_phase=3', 'cost': np.float64(604.5840000000001), 'gain': np.float64(1.2), + 'type': 'time_temperature_zone_control', 'innovation_uplift': np.float64(0.0), + 'cost_minus_uplift': np.float64(604.5840000000001), 'raw_cost': 604.5840000000001, + 'partial_project_funding': np.float64(702.0999999999999), 'partial_project_score': np.float64(41.3), + 'uplift_project_score': np.float64(0.0)}], [ + {'id': '4_phase=4', 'cost': 60.0, 'gain': np.float64(0.0), 'type': 'secondary_heating', + 'innovation_uplift': 0, 'cost_minus_uplift': 60.0, 'raw_cost': 60.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0}] + ] + + solutions = optimise_with_funding_paths( + p=p2, + input_measures=input_measures, + housing_type="Private", + budget=None, + target_gain=1.5, + funding=funding2 + ) + + # 3) basic shape assertions + assert isinstance(solutions, pd.DataFrame) + assert not solutions.empty + + # We should have 2 rows + assert solutions.shape[0] == 2 + + # We should only have None or GBIS + assert set(solutions["scheme"].unique()) == {"none", "gbis"} + + meets_upgrade_gbis = solutions[solutions["meets_upgrade_target"] & solutions["is_eligible"]] + assert meets_upgrade_gbis.shape[0] == 1 + + # Check exact result + assert meets_upgrade_gbis.squeeze().to_dict() == { + 'fixed_ids': ['0_phase=0'], 'items': [ + {'id': '0_phase=0', 'cost': 3881.2024990136756, 'gain': np.float64(3.4000000000000057), + 'type': 'internal_wall_insulation+mechanical_ventilation', 'innovation_uplift': np.float64(0.0), + 'cost_minus_uplift': np.float64(4441.202499013676), 'raw_cost': 3881.2024990136756, + 'partial_project_funding': np.float64(2300.1000000000004), 'partial_project_score': np.float64(135.3), + 'uplift_project_score': np.float64(0.0)}], 'total_cost': 3881.2024990136756, + 'total_gain': 3.4000000000000057, 'path': [{'AND': ['internal_wall_insulation+mechanical_ventilation'], + 'reference': + 'internal_wall_insulation+mechanical_ventilation:gbis'}], + 'scheme': 'gbis', 'is_eligible': True, 'unfunded_items': [], 'meets_upgrade_target': True, 'starting_sap': 68, + 'floor_area': 70.0, 'ending_sap': 71.4, 'starting_band': 'High_D', 'ending_band': 'Low_C', + 'floor_area_band': '0-72', 'project_score': 540.0, 'full_project_funding': 0.0, + 'partial_project_funding': 2300.1000000000004, 'partial_project_score': 135.3, 'total_uplift': 0.0, + 'total_uplift_score': 0.0 + } + + +def test_build_heat_pump_paths(): + eg1 = build_heat_pump_paths([], ["loft_insulation"]) + + assert eg1 == [{'AND': ['loft_insulation', 'air_source_heat_pump']}] + + eg2 = build_heat_pump_paths(["internal_wall_insulation", "external_wall_insulation"], ["loft_insulation"]) + + assert eg2 == [{'AND': ['internal_wall_insulation', 'loft_insulation', 'air_source_heat_pump']}, + {'AND': ['external_wall_insulation', 'loft_insulation', 'air_source_heat_pump']}] diff --git a/serverless.yml b/serverless.yml index c1fc0b09..f3def028 100644 --- a/serverless.yml +++ b/serverless.yml @@ -48,7 +48,7 @@ functions: fastapi-backend: handler: backend.app.main.handler - timeout: 30 + timeout: 600 memorySize: 512 role: FastApiLambdaRole events: @@ -66,7 +66,7 @@ functions: - sqs: arn: arn:aws:sqs:${self:provider.region}:${aws:accountId}:model-engine-queue batchSize: 1 - maximumConcurrency: 2 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits + maximumConcurrency: 12 # Heavily restricts concurrency to avoid overwhelming the ldmbda limits resources: diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index 79238273..f12eb85d 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -3,76 +3,126 @@ This script prepares the data for the financial model """ import pandas as pd +import numpy as np from backend.app.utils import sap_to_epc from sqlalchemy.orm import sessionmaker -from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.connection import db_engine, db_read_session +from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel, PropertyDetailsSpatial +from backend.app.db.functions.materials_functions import get_materials +from collections import defaultdict # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 221 -SCENARIOS = [427] +PORTFOLIO_ID = 435 # Peabody +SCENARIOS = [ + 908, + 909, + 910, +] +scenario_names = { + 908: "EPC C - no solid floor, ashp 3.0", + 909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0", + 910: "EPC B - no solid floor, no EWI, ashp 3.0" +} def get_data(portfolio_id, scenario_ids): session = sessionmaker(bind=db_engine)() session.begin() - # Get properties and their details for a specific portfolio + # -------------------- + # Properties + # -------------------- properties_query = session.query( PropertyModel, PropertyDetailsEpcModel ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID + PropertyModel.portfolio_id == portfolio_id ).all() - # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} - for prop in properties_query + { + **{col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns}, + **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns}, + } + for p in properties_query ] - # Get property IDs from fetched properties + # -------------------- + # Plans + # -------------------- + plans_query = session.query(Plan).filter( + Plan.scenario_id.in_(scenario_ids) + ).all() - # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() - - # Transform plans data to include all fields dynamically plans_data = [ {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} for plan in plans_query ] - # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [p["id"] for p in plans_data] - # Get recommendations through PlanRecommendations for those plans and that are default + # -------------------- + # Recommendations (NO materials yet) + # -------------------- recommendations_query = session.query( Recommendation, Plan.scenario_id ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id + Plan, + Plan.id == PlanRecommendations.plan_id ).filter( PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False) ).all() - # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec, - col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} - for rec in recommendations_query + { + **{col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns}, + "scenario_id": r.scenario_id, + "materials": [] # placeholder + } + for r in recommendations_query ] + recommendation_ids = [r["id"] for r in recommendations_data] + + # -------------------- + # Recommendation materials (SEPARATE QUERY) + # -------------------- + materials_query = session.query( + RecommendationMaterials + ).filter( + RecommendationMaterials.recommendation_id.in_(recommendation_ids) + ).all() + + # Group materials by recommendation_id + materials_by_recommendation = defaultdict(list) + + for m in materials_query: + materials_by_recommendation[m.recommendation_id].append({ + "material_id": m.material_id, + "depth": m.depth, + "quantity": m.quantity, + "quantity_unit": m.quantity_unit, + "estimated_cost": m.estimated_cost, + }) + + # Attach materials safely (no filtering side effects) + for r in recommendations_data: + r["materials"] = materials_by_recommendation.get(r["id"], []) + session.close() return properties_data, plans_data, recommendations_data @@ -84,66 +134,128 @@ properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) recommendations_df = pd.DataFrame(recommendations_data) -recommended_measures_df = recommendations_df[ - ["property_id", "measure_type", "estimated_cost", "default"] -] -recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] -recommended_measures_df = recommended_measures_df.drop(columns=["default"]) +with db_read_session() as session: + materials = get_materials(session) -post_install_sap = recommendations_df[["property_id", "default", "sap_points"]] -post_install_sap = post_install_sap[post_install_sap["default"]] -# Sum up the sap points by property id -post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index() +materials = pd.DataFrame(materials) -recommendations_measures_pivot = recommended_measures_df.pivot( - index='property_id', - columns='measure_type', - values='estimated_cost' -) -recommendations_measures_pivot = recommendations_measures_pivot.reset_index() - -# Total cost is the row sum, excluding the property_id column -recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop( - columns=["property_id"] -).sum(axis=1) - -df = properties_df[ - [ - "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", - "current_epc_rating", - "current_sap_points", "total_floor_area", "number_of_rooms", - ] -].merge( - recommendations_measures_pivot, how="left", on="property_id" -).merge( - post_install_sap, how="left", on="property_id" +material_lookup = ( + materials + .set_index("id")[["type", "includes_battery"]] + .to_dict("index") ) -df = df.drop(columns=["property_id"]) -df["sap_points"] = df["sap_points"].fillna(0) -df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] -df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round() -df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x)) +def has_solar_with_battery(materials_list): + for m in materials_list or []: + mat = material_lookup.get(m["material_id"]) + if not mat: + continue + if mat["type"] == "solar_pv" and mat["includes_battery"]: + return True + return False + + +recommendations_df["has_solar_with_battery"] = ( + recommendations_df["materials"].apply(has_solar_with_battery) +) + +recommendations_df["measure_type"] = np.where( + recommendations_df["has_solar_with_battery"] == True, + recommendations_df["measure_type"] + "_with_battery", + recommendations_df["measure_type"] +) + +# Adjust material type to indicate if there is a battery included -# We merge this back to the main dataframe, which will contain the bathrooms from utils.s3 import read_csv_from_s3, read_excel_from_s3 -# asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath='8/206/asset_list.csv') -asset_list = read_excel_from_s3( - bucket_name="retrofit-plan-inputs-dev", file_key='8/221/20250722T202328736Z/asset_list.xlsx', - header_row=0, sheet_name="320 - edited" -) -asset_list = pd.DataFrame(asset_list) -asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy() -asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"}) -df["uprn"] = df["uprn"].astype(str) -asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str) -asset_list = asset_list.merge( - df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]), - how="left", - on="uprn" -) +# asset_list = read_excel_from_s3( +# bucket_name="retrofit-plan-inputs-dev", file_key="2/404/20251211T163200754Z/asset_list.xlsx", +# header_row=0, sheet_name="Standardised Asset List" +# ) + + +for scenario_id in SCENARIOS: + # Get recs for this scenario + recommended_measures_df = recommendations_df[recommendations_df["scenario_id"] == scenario_id][ + ["property_id", "measure_type", "estimated_cost", "default"] + ] + recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] + recommended_measures_df = recommended_measures_df.drop(columns=["default"]) + + post_install_sap = recommendations_df[recommendations_df["scenario_id"] == scenario_id][ + ["property_id", "default", "sap_points"]] + post_install_sap = post_install_sap[post_install_sap["default"]] + # Sum up the sap points by property id + post_install_sap = post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index() + + # Find dupes by property id and measure type + dupes = recommended_measures_df.duplicated(subset=["property_id", "measure_type"], keep=False) + dupe_df = recommended_measures_df[dupes] + + if dupe_df.shape: + # Drop dupes - happened due to a funny bug + recommended_measures_df = recommended_measures_df.drop_duplicates( + subset=["property_id", "measure_type"], keep='first' + ) + + recommendations_measures_pivot = recommended_measures_df.pivot( + index='property_id', + columns='measure_type', + values='estimated_cost' + ) + recommendations_measures_pivot = recommendations_measures_pivot.reset_index() + + # Total cost is the row sum, excluding the property_id column + recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop( + columns=["property_id"] + ).sum(axis=1) + + df = properties_df[ + [ + "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", + "heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms", + ] + ].merge( + recommendations_measures_pivot, how="left", on="property_id" + ).merge( + post_install_sap, how="left", on="property_id" + ) + + df = df.drop(columns=["property_id"]) + df["sap_points"] = df["sap_points"].fillna(0) + + df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] + df["predicted_post_works_sap"] = df["predicted_post_works_sap"].round() + df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x)) + df["uprn"] = df["uprn"].astype(str) + + # Create excel to store to + filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + f"Project/Final SAL/{scenario_names[scenario_id]} - 20250113 final.xlsx") + with pd.ExcelWriter(filename) as writer: + df.to_excel(writer, sheet_name="properties", index=False) + + +# asset_list = pd.DataFrame(asset_list) +# asset_list = asset_list.rename( +# columns={ +# "postcode": "domna_postcode" +# } +# ) +# if "domna_full_address": +# # For Peabody +# asset_list["domna_full_address"] = asset_list["domna_address_1"] +# +# asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy() +# asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"}) +# asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str) +# asset_list = asset_list.merge( +# df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]), +# how="left", +# on="uprn" +# ) # Get conservation area data from property details spatial. based on the UPRNs @@ -179,11 +291,16 @@ asset_list = asset_list.merge( on="uprn" ) -# For exporting NCHA -asset_list.to_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA/320 Portfolio/asset_list_epc_b.xlsx", +# For exporting +df.to_excel( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/EPC C -without floors proposed measures - " + "with ID.xlsx", index=False ) +# asset_list.to_excel( +# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/epc_measures.xlsx", +# index=False +# ) condition_costs = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx",