Merge branch 'main' of https://github.com/Hestia-Homes/Model into feature/ara-rebaselining

# Conflicts: # asset_list/app.py
2026-07-27 23:35:01 +00:00 · 2026-03-06 09:55:24 +00:00 · 2026-03-06 09:55:24 +00:00 · 6d3b6beadc
commit 6d3b6beadc
parent 2400ade256 3bae341f69
146 changed files with 10149 additions and 3375 deletions
--- a/.devcontainer/asset_list/devcontainer.json
+++ b/.devcontainer/asset_list/devcontainer.json
@ -22,7 +22,9 @@
        "jgclark.vscode-todo-highlight",
        "corentinartaud.pdfpreview",
        "ms-python.vscode-python-envs",
-        "ms-python.black-formatter"
+        "ms-python.black-formatter",
+        "GrapeCity.gc-excelviewer",
+        "jakobhoeg.vscode-pokemon"
      ],
      "settings": {
        "files.defaultWorkspace": "/workspaces/model",
--- a/.devcontainer/backend/Dockerfile
+++ b/.devcontainer/backend/Dockerfile
@ -43,4 +43,24 @@ WORKDIR /workspaces/model

 # 6) Make Python find your package
 # Add project root to PYTHONPATH for all processes
-ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}
+ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}
+
+
+# Install terraform
+RUN apt-get update && sudo apt-get install -y gnupg software-properties-common
+RUN wget -O- https://apt.releases.hashicorp.com/gpg | \
+gpg --dearmor | \
+sudo tee /usr/share/keyrings/hashicorp-archive-keyring.gpg > /dev/null
+RUN echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] \
+https://apt.releases.hashicorp.com $(lsb_release -cs) main" | \
+tee /etc/apt/sources.list.d/hashicorp.list
+RUN apt update
+RUN apt-get install terraform
+RUN terraform -install-autocomplete
+
+# Install postgres
+RUN apt install -y wget gnupg2 lsb-release
+RUN echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list
+RUN wget -qO - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add -
+RUN apt update
+RUN apt install -y postgresql-14
--- a/.devcontainer/backend/devcontainer.json
+++ b/.devcontainer/backend/devcontainer.json
@ -6,7 +6,8 @@
  "workspaceFolder": "/workspaces/model",
  "postStartCommand": "bash .devcontainer/backend/post-install.sh",
  "mounts": [
-    "source=${localEnv:HOME},target=/workspaces/home,type=bind"
+    // "source=${localEnv:HOME},target=/home/vscode,type=bind",
+    "source=${localEnv:HOME}/.aws,target=/home/vscode/.aws,type=bind,consistency=cached"
  ],
  "customizations": {
    "vscode": {
@ -22,7 +23,11 @@
        "corentinartaud.pdfpreview",
        "ms-python.vscode-python-envs",
        "ms-python.black-formatter",
-        "waderyan.gitblame"
+        "waderyan.gitblame",
+        "GrapeCity.gc-excelviewer",
+        "jakobhoeg.vscode-pokemon",
+        "github.vscode-github-actions",
+        "me-dutour-mathieu.vscode-github-actions"
      ],
      "settings": {
        "files.defaultWorkspace": "/workspaces/model",
@ -38,3 +43,4 @@
    "PYTHONFLAGS": "-Xfrozen_modules=off"
  }
 }
+ 
--- a/.devcontainer/backend/requirements.txt
+++ b/.devcontainer/backend/requirements.txt
@ -9,7 +9,7 @@ mangum==0.19.0
 # AWS
 boto3==1.35.44
 # Data
-openpyxl==3.1.2
+openpyxl==3.1.5
 # Basic
 pytz
 uvicorn[standard]
@ -18,5 +18,9 @@ sqlmodel
 pytest==9.0.2
 pytest-cov==7.0.0
 ipykernel>=6.25,<7
+dotenv
+psycopg[binary]
+pytest-postgresql
 # Formatting
-black==26.1.0
+black==26.1.0
+boto3-stubs
--- a/.github/workflows/_build_image.yml
+++ b/.github/workflows/_build_image.yml
@ -38,6 +38,8 @@ on:
        required: false
      DEV_DB_NAME:
        required: false
+      EPC_AUTH_TOKEN:
+        required: false

 jobs:
  build:
@ -47,6 +49,7 @@ jobs:
      DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
      DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
      DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
+      EPC_AUTH_TOKEN: ${{ secrets.EPC_AUTH_TOKEN }}

    outputs:
      image_digest: ${{ steps.digest.outputs.image_digest }}
@ -87,14 +90,17 @@ jobs:
            temp=$(eval echo "$line")
            BUILD_ARGS="$BUILD_ARGS --build-arg $temp"
          done <<< "${{ inputs.build_args }}"
-          
-          docker build \
+
+          docker buildx build \
+            --no-cache \
+            --platform linux/amd64 \
+            --provenance=false \
+            --sbom=false \
+            --push \
            -f ${{ inputs.dockerfile_path }} \
            $BUILD_ARGS \
            -t $IMAGE_URI \
            ${{ inputs.build_context }}
-      
-          docker push $IMAGE_URI

      - name: Resolve image digest
        id: digest
--- a/.github/workflows/_deploy_lambda.yml
+++ b/.github/workflows/_deploy_lambda.yml
@ -42,6 +42,22 @@ on:
        required: true
      AWS_REGION:
        required: true
+      TF_VAR_db_host:
+        required: false
+      TF_VAR_db_name:
+        required: false
+      TF_VAR_db_port:
+        required: false
+      TF_VAR_api_key:
+        required: false
+      TF_VAR_secret_key:
+        required: false
+      TF_VAR_domain_name:
+        required: false
+      TF_VAR_epc_auth_token:
+        required: false
+      TF_VAR_google_solar_api_key:
+        required: false

 jobs:
  deploy:
@ -90,6 +106,15 @@ jobs:

      - name: Terraform Plan
        working-directory: ${{ inputs.lambda_path }}
+        env:
+          TF_VAR_db_host: ${{ secrets.TF_VAR_db_host }}
+          TF_VAR_db_name: ${{ secrets.TF_VAR_db_name }}
+          TF_VAR_db_port: ${{ secrets.TF_VAR_db_port }}
+          TF_VAR_api_key: ${{ secrets.TF_VAR_api_key }}
+          TF_VAR_secret_key: ${{ secrets.TF_VAR_secret_key }}
+          TF_VAR_domain_name: ${{ secrets.TF_VAR_domain_name }}
+          TF_VAR_epc_auth_token: ${{ secrets.TF_VAR_epc_auth_token }}
+          TF_VAR_google_solar_api_key: ${{ secrets.TF_VAR_google_solar_api_key }}
        run: |
          terraform plan \
            -var="stage=${{ inputs.stage }}" \
@ -106,4 +131,18 @@ jobs:
      - name: Terraform Destroy
        if: inputs.terraform_destroy == 'true' && inputs.terraform_apply != 'true'
        working-directory: ${{ inputs.lambda_path }}
-        run: terraform destroy -auto-approve
+        env:
+          TF_VAR_db_host: ${{ secrets.TF_VAR_db_host }}
+          TF_VAR_db_name: ${{ secrets.TF_VAR_db_name }}
+          TF_VAR_db_port: ${{ secrets.TF_VAR_db_port }}
+          TF_VAR_api_key: ${{ secrets.TF_VAR_api_key }}
+          TF_VAR_secret_key: ${{ secrets.TF_VAR_secret_key }}
+          TF_VAR_domain_name: ${{ secrets.TF_VAR_domain_name }}
+          TF_VAR_epc_auth_token: ${{ secrets.TF_VAR_epc_auth_token }}
+          TF_VAR_google_solar_api_key: ${{ secrets.TF_VAR_google_solar_api_key }}
+        run: |
+          terraform destroy -auto-approve \
+            -var="stage=${{ inputs.stage }}" \
+            -var="lambda_name=${{ inputs.lambda_name }}" \
+            -var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \
+            -var="image_digest=${{ inputs.image_digest }}"
--- a/.github/workflows/deploy_fastapi_backend.yml
+++ b/.github/workflows/deploy_fastapi_backend.yml
@ -87,7 +87,13 @@ jobs:

      - name: Build Docker Image For Engine
        run: |
-          docker build -t fastapi-lambda-image:${{ github.sha }} -f backend/docker/engine.Dockerfile . --load
+          docker buildx build \
+            --platform linux/amd64 \
+            --provenance=false \
+            --output=type=docker \
+            -t fastapi-lambda-image:${{ github.sha }} \
+            -f backend/docker/engine.Dockerfile \
+            .

      - name: Login to ECR
        run: |
@ -135,3 +141,4 @@ jobs:

          # Deploy to AWS Lambda via Serverless
          sls deploy --stage ${{ github.ref_name }} --verbose
+
--- a/.github/workflows/deploy_terraform.yml
+++ b/.github/workflows/deploy_terraform.yml
@ -3,12 +3,9 @@ name: Deploy infrastructure
 on:
  push:
    branches:
-      - "**"
-    paths:
-      - 'infrastructure/terraform/**'
-      - '.github/workflows/deploy_terraform.yml'
-      - '.github/workflows/_build_image.yml'
-      - '.github/workflows/_deploy_lambda.yml'
+      - "dev"
+      - "prod"
+  workflow_dispatch:

 jobs:
  determine_stage:
@ -51,6 +48,7 @@ jobs:
    runs-on: ubuntu-latest
    env:
      STAGE: ${{ needs.determine_stage.outputs.stage }}
+      TERRAFORM_APPLY: ${{ needs.determine_stage.outputs.terraform_apply }}

    steps:
      - uses: actions/checkout@v4
@ -76,10 +74,10 @@ jobs:
        run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan

      - name: Terraform Apply
-        if: env.STAGE == 'prod'
+        if: env.TERRAFORM_APPLY == 'true'
        working-directory: infrastructure/terraform/shared
        run: terraform apply -auto-approve tfplan
-
+ 
  # ============================================================
  # 2️⃣ Build Address 2 UPRN image and Push
  # ============================================================
@ -90,10 +88,19 @@ jobs:
      ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
      dockerfile_path: backend/address2UPRN/handler/Dockerfile
      build_context: .
+      build_args: |
+        DEV_DB_HOST=$DEV_DB_HOST
+        DEV_DB_PORT=$DEV_DB_PORT
+        DEV_DB_NAME=$DEV_DB_NAME
+        EPC_AUTH_TOKEN=$EPC_AUTH_TOKEN
    secrets:
      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+      DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
+      DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
+      DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
+      EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }}

  # ============================================================
  # 3️⃣ Deploy Address 2 UPRN Lambda
@ -140,7 +147,7 @@ jobs:
  # 3️⃣ Deploy Postcode Splitter Lambda
  # ============================================================
  postcodeSplitter_lambda:
-    needs: [postcodeSplitter_image, determine_stage]
+    needs: [postcodeSplitter_image, determine_stage, address2uprn_lambda]
    uses: ./.github/workflows/_deploy_lambda.yml
    with:
      lambda_name: postcodeSplitter
@ -192,4 +199,85 @@ jobs:
    secrets:
      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
-      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+
+  # ============================================================
+  # Categorisation image and Push
+  # ============================================================
+  categorisation_image:
+    needs: [determine_stage, shared_terraform]
+    uses: ./.github/workflows/_build_image.yml
+    with:
+      ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }}
+      dockerfile_path: backend/categorisation/handler/Dockerfile
+      build_context: .
+      build_args: |
+        DEV_DB_HOST=$DEV_DB_HOST
+        DEV_DB_PORT=$DEV_DB_PORT
+        DEV_DB_NAME=$DEV_DB_NAME
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+      DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
+      DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
+      DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
+
+  # ============================================================
+  # Deploy Categorisation Lambda
+  # ============================================================
+  categorisation_lambda:
+    needs: [categorisation_image, determine_stage]
+    uses: ./.github/workflows/_deploy_lambda.yml
+    with:
+      lambda_name: categorisation
+      lambda_path: infrastructure/terraform/lambda/categorisation
+      stage: ${{ needs.determine_stage.outputs.stage }}
+      ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }}
+      image_digest: ${{ needs.categorisation_image.outputs.image_digest }}
+      terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+
+  # ============================================================
+  # Ara Engine image and Push
+  # ============================================================
+  ara_engine_image:
+    needs: [determine_stage, shared_terraform]
+    uses: ./.github/workflows/_build_image.yml
+    with:
+      ecr_repo: engine-${{ needs.determine_stage.outputs.stage }}
+      dockerfile_path: backend/docker/engine.Dockerfile
+      build_context: .
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+
+  # ============================================================
+  # Deploy Categorisation Lambda
+  # ============================================================
+  ara_engine_lambda:
+    needs: [ara_engine_image, determine_stage]
+    uses: ./.github/workflows/_deploy_lambda.yml
+    with:
+      lambda_name: ara_engine
+      lambda_path: infrastructure/terraform/lambda/engine
+      stage: ${{ needs.determine_stage.outputs.stage }}
+      ecr_repo: engine-${{ needs.determine_stage.outputs.stage }}
+      image_digest: ${{ needs.ara_engine_image.outputs.image_digest }}
+      terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+      TF_VAR_db_host: ${{ secrets.DEV_DB_HOST }}
+      TF_VAR_db_name: ${{ secrets.DEV_DB_NAME }}
+      TF_VAR_db_port: ${{ secrets.DEV_DB_PORT }}
+      TF_VAR_api_key: ${{ secrets.DEV_API_KEY }}
+      TF_VAR_secret_key: ${{ secrets.DEV_SECRET_KEY }}
+      TF_VAR_domain_name: ${{ secrets.DEV_DOMAIN_NAME }}
+      TF_VAR_epc_auth_token: ${{ secrets.DEV_EPC_AUTH_TOKEN }}
+      TF_VAR_google_solar_api_key: ${{ secrets.DEV_GOOGLE_SOLAR_API_KEY }}
--- a/.gitignore
+++ b/.gitignore
@ -279,4 +279,7 @@ cache/
 *.png
 *.pptx

-local_data*
+local_data*
+
+# pyright local config
+pyrightconfig.json
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@ -10,4 +10,7 @@
    <orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="py.test" />
+  </component>
 </module>
--- a/.idea/watcherTasks.xml
+++ b/.idea/watcherTasks.xml
@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectTasksOptions">
+    <TaskOptions isEnabled="false">
+      <option name="arguments" value="$FilePath$" />
+      <option name="checkSyntaxErrors" value="true" />
+      <option name="description" />
+      <option name="exitCodeBehavior" value="ERROR" />
+      <option name="fileExtension" value="py" />
+      <option name="immediateSync" value="true" />
+      <option name="name" value="Pyright" />
+      <option name="output" value="" />
+      <option name="outputFilters">
+        <array />
+      </option>
+      <option name="outputFromStdout" value="false" />
+      <option name="program" value="$USER_HOME$/.nvm/versions/node/v18.15.0/bin/pyright" />
+      <option name="runOnExternalChanges" value="true" />
+      <option name="scopeName" value="Project Files" />
+      <option name="trackOnlyRoot" value="false" />
+      <option name="workingDir" value="$ProjectFileDir$" />
+      <envs />
+    </TaskOptions>
+  </component>
+</project>
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -9,12 +9,14 @@
            "path": "/bin/bash"
        }
    },
-<<<<<<< HEAD
-=======
    "python.testing.unittestEnabled": false,
    "python.testing.pytestEnabled": true,
-    "python.testing.pytestArgs": ["-s", "-q", "--no-cov"]
->>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
+    "python.testing.pytestArgs": ["-s", "-q", "--no-cov"],
+
+    "python.languageServer": "Pylance",
+    "python.analysis.typeCheckingMode": "strict",
+    "python.analysis.autoSearchPaths": true,
+    "python.analysis.extraPaths": ["./src"]

    // Hot reload setting that needs to be in user settings
    // "jupyter.runStartupCommands": [
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
--- a/asset_list/app.py
+++ b/asset_list/app.py
@ -13,11 +13,15 @@ from asset_list.utils import get_data
 from dotenv import load_dotenv
 from backend.SearchEpc import SearchEpc

-load_dotenv(dotenv_path="backend/.env")
+load_dotenv(dotenv_path="../backend/.env")
 EPC_AUTH_TOKEN = os.getenv(
    "EPC_AUTH_TOKEN",
 )

+OPENAI_API_KEY = os.getenv(
+    "OPENAI_API_KEY",
+)
+

 def extract_address1(
    asset_list, full_address_col, postcode_col, method="first_two_words"
@ -69,18 +73,19 @@ def app():
    Property UPRN
    """

-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals"
-    data_filename = "For Modelling.xlsx"
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals/Missed"
+    # data_filename = "For Modelling - Final - reviewed.xlsx"
+    data_filename = "Missed Properties - with address.xlsx"
    sheet_name = "Sheet1"
    postcode_column = "Postcode"
    address1_column = "address1"
    address1_method = None
-    fulladdress_column = "full_address"
+    fulladdress_column = "address1"
    address_cols_to_concat = []
    missing_postcodes_method = None
    landlord_year_built = None
    landlord_os_uprn = "UPRN"
-    landlord_property_type = None
+    landlord_property_type = "Type"
    landlord_built_form = None
    landlord_wall_construction = None
    landlord_roof_construction = None
@ -102,43 +107,6 @@ def app():
    asset_list_header = 0
    landlord_block_reference = None

-    # Peabody data for cleaning
-    data_folder = (
-        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
-        "Project/data_validation"
-    )
-    data_filename = "to_standardise_uprns.xlsx"
-    sheet_name = "Sheet1"
-    postcode_column = "Postcode"
-    address1_column = None
-    address1_method = "house_number_extraction"
-    fulladdress_column = "Address"
-    address_cols_to_concat = None
-    missing_postcodes_method = None
-    landlord_year_built = None
-    landlord_os_uprn = "UPRN"
-    landlord_property_type = None
-    landlord_built_form = None
-    landlord_wall_construction = None
-    landlord_roof_construction = None
-    landlord_heating_system = None
-    landlord_existing_pv = None
-    landlord_property_id = "LLUPRN"
-    landlord_sap = None
-    outcomes_filename = None
-    outcomes_sheetname = None
-    outcomes_postcode = None
-    outcomes_houseno = None
-    outcomes_id = None
-    outcomes_address = None
-    master_filepaths = []
-    master_id_colnames = []
-    master_to_asset_list_filepath = None
-    phase = False
-    ecosurv_landlords = None
-    asset_list_header = 0
-    landlord_block_reference = None
-
    # Maps addresses to uprn in problematic cases
    manual_uprn_map = {}

@ -441,10 +409,6 @@ def app():
    )

    asset_list.merge_data(epc_df)
-    # asset_list.standardised_asset_list = asset_list.standardised_asset_list[
-    #     asset_list.standardised_asset_list["domna_full_address"]
-    #     != "120 Airdrie Crescent, Burnley, Lancashire"
-    # ]
    asset_list.extract_attributes()
    asset_list.identify_worktypes()

@ -458,27 +422,6 @@ def app():
        os.path.join(data_folder, ".".join(data_filename.split(".")[:-1]))
        + " - Standardised.xlsx"
    )
-    # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
-
-    # Determine inspections priority
-    # solar_jobs = asset_list.standardised_asset_list[~pd.isnull(asset_list.standardised_asset_list["solar_reason"])][
-    #     "domna_postcode"].unique()
-    # asset_list.standardised_asset_list["in_solar_area"] = asset_list.standardised_asset_list["domna_postcode"].isin(
-    #     solar_jobs
-    # )
-    # # Same for cav
-    # cavity_jobs = asset_list.standardised_asset_list[
-    #     ~pd.isnull(asset_list.standardised_asset_list["cavity_reason"])
-    # ]["domna_postcode"].unique()
-    # asset_list.standardised_asset_list["in_cavity_area"] = asset_list.standardised_asset_list["domna_postcode"].isin(
-    #     cavity_jobs
-    # )
-    # # We prioritise properties that are in solar areas and cavity areas
-    # import numpy as np
-    # asset_list.standardised_asset_list["inspection_priority"] = np.where(
-    #     asset_list.standardised_asset_list["in_solar_area"] | asset_list.standardised_asset_list["in_cavity_area"],
-    #     1, 2
-    # )

    with pd.ExcelWriter(filename) as writer:
        asset_list.standardised_asset_list.to_excel(
--- a/asset_list/mappings/built_form.py
+++ b/asset_list/mappings/built_form.py
@ -528,6 +528,107 @@ BUILT_FORM_MAPPINGS = {
    'House: Semi Detached: Top Floor': 'semi-detached',
    'House: End Terrace: Ground Floor': 'end-terrace',
    'Maisonette: Enclosed End Terrace: Mid Floor': 'enclosed end-terrace',
-    'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace'
+    'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace',

+    '2 BED MID TERRACED HOUSE': 'mid-terrace',
+    '4 BED SEMI DETACHED-PARLOURED': 'semi-detached',
+    '2 BED END TERRACED HOUSE': 'end-terrace',
+    '3 BED MID TERRACED HOUSE': 'mid-terrace',
+    '3 BED SEMI DETACHED HOUSE': 'semi-detached',
+    '3 BED MID TERRACE - PARLOURED': 'mid-terrace',
+    '3 BED END TERRACE - PARLOURED': 'end-terrace',
+    '4 BED+ END TERRACED HOUSE': 'end-terrace',
+    '3 BED END TERRACED HOUSE': 'end-terrace',
+    '3 BED SEMI DETACHED-PARLOURED': 'semi-detached',
+    '4 BED+ END TERRACE - PARLOURED': 'end-terrace',
+    '2 BED SEMI DETACHED HOUSE': 'semi-detached',
+    '3 BED DETACHED HOUSE': 'detached',
+    '2 BED GRD FLR COTT FLT-CNT STR': 'ground floor',
+    '2 BED 1ST FLOOR WALKUP FLAT': 'mid-floor',
+    '1 BED GRD FL COTT FLAT-OWN ENT': 'ground floor',
+    '1 BED 1ST FL WALK UP DECK ACC': 'mid-floor',
+    '2 BED MAISONETTE UPPER COM ENT': 'mid-floor',
+    '2 BED GRD FLR COTT FLT OWN ENT': 'ground floor',
+    '1 BED BUNGALOW': 'unknown',
+    '2 BED GRD FL COTT FLT-OWN ENTR': 'ground floor',
+    '1 BED 1ST FL COTT FLT-CNT STR': 'mid-floor',
+    '1 BED GRD FL WALK UP OWN ENT': 'ground floor',
+    '1 BED GRD FLOOR WALKUP FLAT': 'ground floor',
+    '2 BED GRD FLOOR WALKUP FLAT': 'ground floor',
+    '2 BED 1ST FLR FLT-SHELTERED': 'mid-floor',
+    '2 BED BUNGALOW': 'unknown',
+    '2 BED GRD FLR COTT FLT(P)-1950': 'ground floor',
+
+    'Ground Floor Front Left': 'ground floor',
+    'End-Terrace House': 'end-terrace',
+    'Ground floor': 'ground floor',
+    'Ground Floor Front Right': 'ground floor',
+    'End Terrace (GII List)': 'end-terrace',
+    'Semi  Detached House': 'semi-detached',
+    'Ground Floor Right': 'ground floor',
+    'PB Ground Floor Flat': 'ground floor',
+    'Basement and Ground Floor': 'ground floor',
+    'Semi-detached bungalow': 'detached',
+    'Detached Cottage': 'detached',
+    'Lower & Ground Floor': 'ground floor',
+    'Ground FLoor Flat': 'ground floor',
+    'ground floor': 'ground floor',
+    'Ground Floor Left': 'ground floor',
+    'Semi-detached House': 'detached',
+    'Basement & Lower Ground': 'basement',
+    'Semi-Detached  House': 'detached',
+    'Ground floor flat -': 'ground floor',
+    'Basement Flat': 'basement',
+    'semi-detached bungalow': 'semi-detached',
+    'Lower Ground Floor Flat': 'ground floor',
+    'Ground floor Flat': 'ground floor',
+    'Ground Floor flat': 'ground floor',
+    'Ground': 'ground floor',
+    'Semi detached Bungalow': 'semi-detached',
+    'ground floor flat': 'ground floor',
+    'Mid terrace House': 'mid-terrace',
+    'Raised Ground Floor': 'ground floor',
+    'Basement Floor': 'basement',
+    'Second floor flat': 'mid-floor',
+    'Fourth Floor Flat': 'mid-floor',
+    'First/Second Maisonette': 'mid-floor',
+    'Ground/First': 'ground floor',
+    'First and Second Floor': 'mid-floor',
+    'Terrace House': 'mid-terrace',
+    '1st/2nd Floor Maisonette': 'mid-floor',
+    'Semi-det House': 'semi-detached',
+    'First': 'mid-floor',
+    'Ground & First Floor': 'ground floor',
+    'End of Terrace House': 'end-terrace',
+    '2nd Floor Purpose Built': 'mid-floor',
+    'First/Second Floor Maison': 'mid-floor',
+    'GFF purpose built': 'ground floor',
+    'Second': 'mid-floor',
+    'Semi-det House (GII List)': 'semi-detached',
+    '3rd and 4th Floor': 'mid-floor',
+    'First Floor flat': 'mid-floor',
+    'Mid-Terrace House': 'mid-terrace',
+    '1st & 2nd Floors': 'mid-floor',
+    'Ground/first floor': 'ground floor',
+    'FFF purpose built': 'mid-floor',
+    'Second floor': 'mid-floor',
+    'Second/Third floor': 'mid-floor',
+    'First floor Flat': 'mid-floor',
+    'First floor': 'mid-floor',
+    'Lower Ground Flat': 'basement',
+    'First Floor Rear Flat': 'mid-floor',
+    'First & Second Floor': 'mid-floor',
+    'Ground & Lower Ground': 'basement',
+    'First Floor Rear': 'mid-floor',
+    'First & Second': 'mid-floor',
+    'First Floor Front': 'mid-floor',
+    'First & Second Floors': 'mid-floor',
+    'First/Second Floor': 'mid-floor',
+    'Sem-detach house': 'semi-detached',
+    'Second Floor Flat (Top)': 'top-floor',
+    '3 FloorTerrace House': 'mid-terrace',
+    'First floor flat': 'mid-floor',
+    'First & Second Floor Flat': 'mid-floor',
+    'First Floor Purpose Built': 'mid-floor',
+    'Purpose built First Floor': 'mid-floor',
 }
--- a/asset_list/mappings/heating_systems.py
+++ b/asset_list/mappings/heating_systems.py
@ -498,6 +498,23 @@ HEATING_MAPPINGS = {

    'Boiler: A rated Combi, System 2: Boiler: A rated Combi': 'gas combi boiler',
    'System 2: Boiler: A rated Regular Boiler, Boiler: A rated Regular Boiler': 'gas boiler, radiators',
-    'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler'
+    'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler',
+
+    'IDEAL ISAR HE30': 'gas combi boiler',
+    'WORCESTER GREENSTAR 25 SI': 'gas combi boiler',
+    'POTTERTON PROMAX COMBI 28 HE PLUS': 'gas combi boiler',
+    'WORCESTER GREENSTAR 28I JUNIOR': 'gas combi boiler',
+    'BAXI ASSURE 25 COMBI': 'gas combi boiler',
+    'POTTERTON PROMAX COMBI 28 HE PLUS A': 'gas combi boiler',
+    'WORCESTER GREENSTAR 30 SI': 'gas combi boiler',
+    'POTTERTON SUPRIMA 40L': 'gas boiler, radiators',
+    'POTTERTON ASSURE 30 COMBI': 'gas combi boiler',
+    'POTTERTON PROMAX 28 COMBI ERP': 'gas combi boiler',
+    'BAXI ASSURE 30 COMBI': 'gas combi boiler',
+    'POTTERTON PROMAX 18 SYSTEM ERP': 'gas boiler, radiators',
+    'POTTERTON PROMAX COMBI 33 HE PLUS A': 'gas combi boiler',
+    'POTTERTON SUPRIMA 40 HE': 'gas boiler, radiators',
+    'FERROLI MODENA 102': 'gas boiler, radiators',
+    'POTTERTON PROMAX COMBI 24 HE PLUS A': 'gas combi boiler'

 }
--- a/asset_list/mappings/property_type.py
+++ b/asset_list/mappings/property_type.py
@ -444,6 +444,9 @@ PROPERTY_MAPPING = {
    'Warden Bungalow': 'bungalow',
    'Warden Flat': 'flat',
    'Upper Floor Flat': 'flat',
-    'Extracare Scheme': 'other'
+    'Extracare Scheme': 'other',
+
+    'SHELTERED': 'unknown',
+    'PARLOUR': 'unknown',

 }
--- a/asset_list/mappings/roof.py
+++ b/asset_list/mappings/roof.py
@ -320,6 +320,8 @@ ROOF_CONSTRUCTION_MAPPINGS = {
    'Pitched (slates or tiles)  access to loft, 100mm': 'pitched insulated',
    'Pitched (slates or tiles)  no loft access, 200mm': 'pitched insulated',
    'Pitched (slates or tiles)  access to loft, 200mm': 'pitched insulated',
-    'Pitched (slates or tiles)  access to loft, 50mm': 'pitched less than 100mm insulation'
+    'Pitched (slates or tiles)  access to loft, 50mm': 'pitched less than 100mm insulation',
+
+    'Pitched roofs': 'pitched unknown insulation',

 }
--- a/asset_list/mappings/walls.py
+++ b/asset_list/mappings/walls.py
@ -369,6 +369,9 @@ WALL_CONSTRUCTION_MAPPINGS = {
    'Solid Brick, As built': 'solid brick unknown insulation',
    'System built, As built': 'system built unknown insulation',
    'Timber frame, As built': 'timber frame unknown insulation',
-    'Cavity, As built': 'cavity unknown insulation'
+    'Cavity, As built': 'cavity unknown insulation',

+    'FILLED CAVITY': 'filled cavity',
+    'EXTERNAL': 'insulated solid brick',
+    'AS BUILT': 'other'
 }
--- a/asset_list/requirements.txt
+++ b/asset_list/requirements.txt
@ -5,7 +5,7 @@ epc-api-python==1.0.2
 thefuzz
 boto3
 openpyxl
-openai>=1.3.5
+openai==1.93.0
 tiktoken
 msgpack
 beautifulsoup4
--- a/backend/.env.test
+++ b/backend/.env.test
@ -19,4 +19,5 @@ PLAN_TRIGGER_BUCKET=test
 DATA_BUCKET=test
 EPC_AUTH_TOKEN=test
 ENGINE_SQS_URL=test
-ENERGY_ASSESSMENTS_BUCKET=test
+CATEGORISATION_SQS_URL=test
+ENERGY_ASSESSMENTS_BUCKET=test
--- a/backend/Outputs.py
+++ b/backend/Outputs.py
@ -8,7 +8,11 @@ from utils.s3 import read_from_s3, save_excel_to_s3
 from backend.app.utils import sap_to_epc
 from backend.app.db.connection import db_engine
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
-from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
+from backend.app.db.models.recommendations import (
+    Recommendation,
+    PlanModel,
+    PlanRecommendations,
+)


 class Outputs:
@ -42,7 +46,7 @@ class Outputs:
        "flat_roof_insulation": "Flat roof (Out of scope - prov sum only)",
        "room_in_roof_insulation": "RIR (POA - Prov sum only)",
        "ev_charging": "EV Charging",
-        "battery": "Battery"
+        "battery": "Battery",
    }

    def __init__(self, format, portfolio_id):
@ -67,28 +71,38 @@ class Outputs:
        # Download cleaned data
        self.cleaned_epc_lookup = read_from_s3(
            s3_file_name="cleaned_epc_data/cleaned.bson",
-            bucket_name="retrofit-data-dev"
+            bucket_name="retrofit-data-dev",
        )

        self.cleaned_epc_lookup = msgpack.unpackb(self.cleaned_epc_lookup, raw=False)

    def get_properties_from_db(self):
        # Get properties and their details for a specific portfolio
-        properties_query = self.session.query(
-            PropertyModel,
-            PropertyDetailsEpcModel
-        ).join(
-            PropertyDetailsEpcModel,
-            PropertyModel.id == PropertyDetailsEpcModel.property_id
-        ).filter(
-            PropertyModel.portfolio_id == self.portfolio_id  # Filter by portfolio ID
-        ).all()
+        properties_query = (
+            self.session.query(PropertyModel, PropertyDetailsEpcModel)
+            .join(
+                PropertyDetailsEpcModel,
+                PropertyModel.id == PropertyDetailsEpcModel.property_id,
+            )
+            .filter(
+                PropertyModel.portfolio_id
+                == self.portfolio_id  # Filter by portfolio ID
+            )
+            .all()
+        )

        # Transform properties data to include all fields dynamically
        properties_data = [
-            {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
-             **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
-                PropertyDetailsEpcModel.__table__.columns}}
+            {
+                **{
+                    col.name: getattr(prop.PropertyModel, col.name)
+                    for col in PropertyModel.__table__.columns
+                },
+                **{
+                    col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
+                    for col in PropertyDetailsEpcModel.__table__.columns
+                },
+            }
            for prop in properties_query
        ]

@ -96,10 +110,14 @@ class Outputs:

    def get_plans_from_db(self):

-        plans_query = self.session.query(Plan).filter(Plan.portfolio_id == self.portfolio_id).all()
+        plans_query = (
+            self.session.query(PlanModel)
+            .filter(PlanModel.portfolio_id == self.portfolio_id)
+            .all()
+        )
        # Transform plans data to include all fields dynamically
        plans_data = [
-            {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+            {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
            for plan in plans_query
        ]

@ -107,28 +125,38 @@ class Outputs:

    def get_recommendations_from_db(self, plan_ids):
        # Get recommendations through PlanRecommendations for those plans and that are default
-        recommendations_query = self.session.query(
-            Recommendation,
-            Plan.scenario_id
-        ).join(
-            PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
-        ).join(
-            Plan, Plan.id == PlanRecommendations.plan_id  # Join with Plan to access scenario_id
-        ).filter(
-            PlanRecommendations.plan_id.in_(plan_ids),
-            Recommendation.default == True  # Filtering for default recommendations
-        ).all()
+        recommendations_query = (
+            self.session.query(Recommendation, PlanModel.scenario_id)
+            .join(
+                PlanRecommendations,
+                Recommendation.id == PlanRecommendations.recommendation_id,
+            )
+            .join(
+                PlanModel,
+                PlanModel.id
+                == PlanRecommendations.plan_id,  # Join with Plan to access scenario_id
+            )
+            .filter(
+                PlanRecommendations.plan_id.in_(plan_ids),
+                Recommendation.default == True,  # Filtering for default recommendations
+            )
+            .all()
+        )

        # Transform recommendations data to include all fields dynamically and include scenario_id
        recommendations_data = [
            {
                **{
-                    col.name: getattr(rec.Recommendation, col.name) if
-                    hasattr(rec, 'Recommendation') else getattr(rec, col.name)
+                    col.name: (
+                        getattr(rec.Recommendation, col.name)
+                        if hasattr(rec, "Recommendation")
+                        else getattr(rec, col.name)
+                    )
                    for col in Recommendation.__table__.columns
                },
-                "Scenario ID": rec.scenario_id
-            } for rec in recommendations_query
+                "Scenario ID": rec.scenario_id,
+            }
+            for rec in recommendations_query
        ]

        return recommendations_data
@ -148,7 +176,9 @@ class Outputs:
            measure_label = self.MDS_MEASURE_MAPPING.get(measure_type, None)

            # If the property_id already exists in the collected rows, update it
-            existing_row = next((item for item in rows if item["property_id"] == property_id), None)
+            existing_row = next(
+                (item for item in rows if item["property_id"] == property_id), None
+            )
            if existing_row is None:
                # Create a new row if the property_id doesn't exist
                new_row = {measure: None for measure in all_measures}
@ -196,7 +226,7 @@ class Outputs:
        properties_data = self.get_properties_from_db()

        plans_data = self.get_plans_from_db()
-        plan_ids = [plan['id'] for plan in plans_data]
+        plan_ids = [plan["id"] for plan in plans_data]

        recommendations_data = self.get_recommendations_from_db(plan_ids)
        self.session.close()
@ -209,50 +239,54 @@ class Outputs:
        scenario_ids = plans_df["scenario_id"].unique()

        # We start to create the MDS sheet
-        mds = properties_df[
-            [
-                "property_id",
-                "address",
-                "postcode",
-                "uprn",
-                "current_epc_rating",
-                "current_sap_points",
-                "primary_energy_consumption",
-                "property_type",
-                "built_form",
-                "total_floor_area",
-                "walls",
-                "tenure",
-                "mainfuel",
-                # The bills columns are split out - we include them and aggregate, without appliances
-                "heating_cost_current",
-                "hot_water_cost_current",
-                "lighting_cost_current",
-                "gas_standing_charge",
-                "electricity_standing_charge"
+        mds = (
+            properties_df[
+                [
+                    "property_id",
+                    "address",
+                    "postcode",
+                    "uprn",
+                    "current_epc_rating",
+                    "current_sap_points",
+                    "primary_energy_consumption",
+                    "property_type",
+                    "built_form",
+                    "total_floor_area",
+                    "walls",
+                    "tenure",
+                    "mainfuel",
+                    # The bills columns are split out - we include them and aggregate, without appliances
+                    "heating_cost_current",
+                    "hot_water_cost_current",
+                    "lighting_cost_current",
+                    "gas_standing_charge",
+                    "electricity_standing_charge",
+                ]
            ]
-        ].copy().rename(
-            columns={
-                "address": "Address",
-                "postcode": "Postcode",
-                "uprn": "UPRN",
-                "current_epc_rating": "Pre EPC",
-                "current_sap_points": "EPC Source",
-                "primary_energy_consumption": "Existing Heating Demand Kwh/m2/y",
-                "property_type": "Property Type",
-                "built_form": "Built Form",
-                "total_floor_area": "Floor area m2 (If known)",
-                "walls": "Wall Type (Mandatory field)",
-                "tenure": "Tenure",
-            }
+            .copy()
+            .rename(
+                columns={
+                    "address": "Address",
+                    "postcode": "Postcode",
+                    "uprn": "UPRN",
+                    "current_epc_rating": "Pre EPC",
+                    "current_sap_points": "EPC Source",
+                    "primary_energy_consumption": "Existing Heating Demand Kwh/m2/y",
+                    "property_type": "Property Type",
+                    "built_form": "Built Form",
+                    "total_floor_area": "Floor area m2 (If known)",
+                    "walls": "Wall Type (Mandatory field)",
+                    "tenure": "Tenure",
+                }
+            )
        )

        mds["Estimated bill (£ per year)"] = (
-            mds["heating_cost_current"] +
-            mds["hot_water_cost_current"] +
-            mds["lighting_cost_current"] +
-            mds["gas_standing_charge"] +
-            mds["electricity_standing_charge"]
+            mds["heating_cost_current"]
+            + mds["hot_water_cost_current"]
+            + mds["lighting_cost_current"]
+            + mds["gas_standing_charge"]
+            + mds["electricity_standing_charge"]
        )

        mds = mds.drop(
@ -261,65 +295,84 @@ class Outputs:
                "hot_water_cost_current",
                "lighting_cost_current",
                "gas_standing_charge",
-                "electricity_standing_charge"
+                "electricity_standing_charge",
            ]
        )

        # Formatting - Pre EPC is an enum
        mds["Pre EPC"] = [x.value for x in mds["Pre EPC"].values]
-        mds["Wall Type (Mandatory field)"] = mds["Wall Type (Mandatory field)"].str.split(",").str[0]
+        mds["Wall Type (Mandatory field)"] = (
+            mds["Wall Type (Mandatory field)"].str.split(",").str[0]
+        )
        # Remove average thermal transmittance field
        mds["Wall Type (Mandatory field)"] = np.where(
-            mds["Wall Type (Mandatory field)"].str.contains("Average thermal transmittance"),
+            mds["Wall Type (Mandatory field)"].str.contains(
+                "Average thermal transmittance"
+            ),
            "",
-            mds["Wall Type (Mandatory field)"]
+            mds["Wall Type (Mandatory field)"],
        )

        mds = mds.merge(
-            pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[["clean_description", "fuel_type"]],
+            pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[
+                ["clean_description", "fuel_type"]
+            ],
            left_on="mainfuel",
            right_on="clean_description",
-            how="left"
+            how="left",
+        )
+        mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(
+            columns=["clean_description", "mainfuel"]
        )
-        mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(columns=["clean_description", "mainfuel"])

        mds["Existing Fuel Type"].value_counts()

        mds_output_by_scenario = {}
        for scenario_id in scenario_ids:
-            scenario_recommendations = recommendations_df[recommendations_df["Scenario ID"] == scenario_id]
+            scenario_recommendations = recommendations_df[
+                recommendations_df["Scenario ID"] == scenario_id
+            ]

            # For each measure, we create the measure matrix
-            scenario_measure_matrix = self.make_mds_measure_matrix(scenario_recommendations)
+            scenario_measure_matrix = self.make_mds_measure_matrix(
+                scenario_recommendations
+            )

            # Calculate the predicted impact on: SAP, heat demand, bills, kwh
-            recommendation_impacts = scenario_recommendations.groupby("property_id")[
-                ["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"]
-            ].sum().reset_index()
+            recommendation_impacts = (
+                scenario_recommendations.groupby("property_id")[
+                    ["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"]
+                ]
+                .sum()
+                .reset_index()
+            )

            scenario_mds = mds.merge(
                scenario_measure_matrix, how="left", on="property_id"
-            ).merge(
-                recommendation_impacts, how="left", on="property_id"
-            )
+            ).merge(recommendation_impacts, how="left", on="property_id")
            # If we have no recommendations, sap_points, kwh_savings, head_demand will be NaN
            to_clean = [c for c in recommendation_impacts.columns if c != "property_id"]
            for col in to_clean:
                scenario_mds[col].fillna(0, inplace=True)
            scenario_mds.fillna(0, inplace=True)
-            scenario_mds["Post SAP"] = scenario_mds["EPC Source"] + scenario_mds["sap_points"]
+            scenario_mds["Post SAP"] = (
+                scenario_mds["EPC Source"] + scenario_mds["sap_points"]
+            )
            # Round Post SAP down to the nearest integer
            scenario_mds["Post SAP"] = scenario_mds["Post SAP"].apply(lambda x: int(x))
-            scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(lambda x: sap_to_epc(x))
+            scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(
+                lambda x: sap_to_epc(x)
+            )
            scenario_mds["Heating Demand Kwh/m2/y"] = (
-                scenario_mds["Existing Heating Demand Kwh/m2/y"] - scenario_mds["heat_demand"]
+                scenario_mds["Existing Heating Demand Kwh/m2/y"]
+                - scenario_mds["heat_demand"]
            )

            scenario_mds = scenario_mds.rename(
                columns={
                    "sap_points": "Predicted SAP Points",
                    "kwh_savings": "Energy Saving (Kwh)",
-                    "energy_cost_savings": "Bill Reduction (£ per yr)"
+                    "energy_cost_savings": "Bill Reduction (£ per yr)",
                }
            )

@ -330,7 +383,7 @@ class Outputs:
            save_excel_to_s3(
                df=scenario_mds,
                file_key=f"engine_outputs/{self.format}/{self.today}_scenario_id={scenario_id}.xlsx",
-                bucket_name="retrofit-data-dev"
+                bucket_name="retrofit-data-dev",
            )

    def export(self):
--- a/backend/Property.py
+++ b/backend/Property.py
@ -490,7 +490,7 @@ class Property:
        for rec_id in rec_ids:
            sim_epc = self.simulation_epcs[rec_id].copy()
            rec_impact = [x for x in impact_summary if x["recommendation_id"] == rec_id][0]
-            # We update all of the features that should have an impact on the kwh model
+            # We update all features that should have an impact on the kwh model

            sim_epc.update(
                {
@ -660,8 +660,6 @@ class Property:
        self.set_floor_type()
        self.set_floor_level()
        self.set_windows_count()
-        self.set_energy_source()
-        self.find_energy_sources()
        self.set_current_energy(kwh_client, kwh_predictions)

    def set_solar_panel_configuration(self, solar_panel_configuration):
@ -1168,202 +1166,6 @@ class Property:
            if condition_data.get("windows_area") is not None \
            else None

-    def set_energy_source(self):
-        """
-        This method sets the energy source of the property, based on the mains gas flag and energy tariff.
-        """
-        # Default to "electricity_and_gas" to cover most scenarios including when mains_gas_flag is True
-        energy_source = "electricity_and_gas"
-
-        # If the tariff explicitly indicates electricity use without a dual indication and mains_gas_flag is not True
-        # We check for the common electricity tariffs
-        if not self.data["mains-gas-flag"] and self.data["energy-tariff"] in [
-            "Single",
-            "off-peak 7 hour",
-            "off-peak 10 hour",
-            "off-peak 18 hour",
-            "standard tariff",
-            "24 hour",
-        ]:
-            energy_source = "electricity"
-
-        # Set the energy source based on the conditions above
-        self.energy_source = energy_source
-
-    def find_energy_sources(self):
-        # Based on the heating and the hot water
-        heating_fuel_mapping = {
-            'has_mains_gas': 'Natural Gas',
-            'has_electric': 'Electricity',
-            'has_oil': 'Oil',
-            'has_wood_logs': 'Wood Logs',
-            'has_coal': 'Coal',
-            'has_anthracite': 'Anthracite',
-            'has_smokeless_fuel': 'Smokeless Fuel',
-            'has_lpg': 'LPG',
-            'has_b30k': 'B30K Biofuel',
-            'has_air_source_heat_pump': 'Electricity',
-            'has_ground_source_heat_pump': 'Electricity',
-            'has_water_source_heat_pump': 'Electricity',
-            'has_electric_heat_pump': 'Electricity',
-            'has_solar_assisted_heat_pump': 'Electricity',
-            'has_exhaust_source_heat_pump': 'Electricity',
-            'has_community_heat_pump': 'Electricity',
-            'has_wood_pellets': 'Wood Pellets',
-            'has_community_scheme': 'Varied (Community Scheme)',
-            "has_dual_fuel_mineral_and_wood": 'Wood Logs',
-            "has_electricaire": 'Electricity',
-            "has_wood_chips": 'Wood Logs'
-        }
-
-        # Hot water
-        heater_type_to_fuel = {
-            'gas instantaneous': 'Natural Gas',
-            'electric heat pump': 'Electricity',
-            'electric immersion': 'Electricity',
-            'gas boiler': 'Natural Gas',
-            'oil boiler': 'Oil',
-            'electric instantaneous': 'Electricity',
-            'gas multipoint': 'Natural Gas',
-            'heat pump': 'Electricity',
-            'solid fuel boiler': 'Solid Fuel',
-            'solid fuel range cooker': 'Solid Fuel',
-            'room heaters': 'Varied',  # Could be any fuel, further specifics needed based on context
-            "single-point gas": "Natural Gas"
-        }
-
-        # Define a mapping from system types to general categories or modifications of fuel types
-        system_type_modification = {
-            'from main system': 'Main System',
-            'from secondary system': 'Secondary System',
-            'from second main heating system': 'Secondary System',
-            'community scheme': 'Community Scheme'
-        }
-
-        hotwater_appliance_to_fuel = {
-            'gas range cooker': 'Natural Gas',
-            'oil range cooker': 'Oil'
-        }
-
-        fuel_map = {
-            None: "Natural Gas (Community Scheme)",
-            "mains gas": "Natural Gas (Community Scheme)",
-            "biomass": "Smokeless Fuel",
-            "electricity": "Electricity",
-            "biogas": "Smokeless Fuel",
-            "heat network": "Natural Gas (Community Scheme)",
-            "lpg": 'LPG',
-            "biodiesel": "Smokeless Fuel",
-            "b30d": "B30K Biofuel",
-            "coal": "Coal",
-            "oil": "Oil",
-            "unknown": None  # Handle - anything post 2020 is electricity else gas
-        }
-
-        self.heating_energy_source = list({
-            fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False)
-        })
-
-        if set(self.heating_energy_source) == {'Electricity', 'Natural Gas'}:
-            # It means they have mixed heating so we take the primary one, based on main fuel
-            # This will probably happen in the case of an extension
-            if self.main_fuel["clean_description"] in ["Mains gas not community", "Mains gas community"]:
-                self.heating_energy_source = ['Natural Gas']
-            else:
-                self.heating_energy_source = ['Electricity']
-
-        if set(self.heating_energy_source) == {'Electricity', 'LPG'}:
-            if self.main_fuel["clean_description"] in ["Lpg not community", "Lpg community"]:
-                self.heating_energy_source = ['LPG']
-            else:
-                self.heating_energy_source = ['Electricity']
-
-        if set(self.heating_energy_source) == {'Natural Gas', 'Wood Logs'}:
-            # It means they have mixed heating so we take the primary one, based on main fuel
-            # This will probably happen in the case of an extension
-            if self.main_fuel["clean_description"] in ["Mains gas not community", "Mains gas community"]:
-                self.heating_energy_source = ['Natural Gas']
-            else:
-                self.heating_energy_source = ['Wood Logs']
-
-        if len(self.heating_energy_source) > 1 and "Varied (Community Scheme)" not in self.heating_energy_source:
-            # We might have something like heating energy source equal to ['Natural Gas', 'Varied (Community Scheme)']
-            # so we treat this as community heating
-            raise Exception("Investigate me")
-
-        if len(self.heating_energy_source) == 0:
-            heating_flags = {
-                v for k, v in self.main_heating.items() if k not in ["original_description", "clean_description"]
-            }
-            hotwater_flags = {
-                v for k, v in self.hotwater.items() if k not in ["original_description", "clean_description"]
-            }
-
-            # If all flags are zero, we have a no data example
-            if (heating_flags == {False} or hotwater_flags == {None}) and (
-                hotwater_flags == {False} or hotwater_flags == {None}):
-                # We have nodata so we try and rely on main fuel
-                if self.main_fuel["fuel_type"] in fuel_map:  # We assume when None as it's unknown
-                    mapped_fuel = fuel_map[self.main_fuel["fuel_type"]]
-                    self.heating_energy_source = mapped_fuel
-                    self.hot_water_energy_source = mapped_fuel
-                    return
-                else:
-                    raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}")
-
-            # We handle edge case where no heating system is indicated
-            if self.main_fuel["fuel_type"] in fuel_map:
-                mapped_fuel = fuel_map[self.main_fuel["fuel_type"]]
-                self.heating_energy_source = mapped_fuel
-                self.hot_water_energy_source = mapped_fuel
-                return
-
-        if len(self.heating_energy_source) > 1:
-            # We treat this as a community scheme
-            self.heating_energy_source = ["Varied (Community Scheme)"]
-
-        self.heating_energy_source = self.heating_energy_source[0]
-
-        if self.heating_energy_source == "Varied (Community Scheme)":
-
-            if self.main_fuel["fuel_type"] in fuel_map:  # We assume when None as it's unknown
-                mapped_to = fuel_map[self.main_fuel["fuel_type"]]
-                if mapped_to is None and self.main_fuel["fuel_type"] == "unknown":
-                    # Handle logic based on age band
-                    if self.year_built >= 2020:
-                        self.heating_energy_source = "Electricity"
-                    else:
-                        self.heating_energy_source = "Natural Gas (Community Scheme)"
-
-                else:
-                    self.heating_energy_source = mapped_to
-            else:
-                raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}")
-
-        if self.hotwater["heater_type"] is not None:
-            self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]]
-
-            if self.hotwater["extra_features"] == "plus solar":
-                self.hot_water_energy_source = self.heating_energy_source + " + Solar Thermal"
-                return
-        elif self.hotwater["system_type"] is not None:
-            fuel = system_type_modification[self.hotwater["system_type"]]
-
-            if self.hotwater["extra_features"] == "plus solar":
-                self.hot_water_energy_source = self.heating_energy_source + " + Solar Thermal"
-                return
-
-            if fuel in ['Main System', "Community Scheme"]:
-                self.hot_water_energy_source = self.heating_energy_source
-            elif fuel in ['Secondary System']:
-                # Check the secondary heating system
-                secondary_heating = self.data["secondheat-description"]
-                self.hot_water_energy_source = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[secondary_heating]["fuel"]
-            else:
-                raise NotImplementedError(f"Investiage me - unhandled hot water fuel {fuel}")
-        else:
-            self.hot_water_energy_source = hotwater_appliance_to_fuel[self.hotwater["appliance"]]
-
    def is_ashp_valid(self, measures):

        if "air_source_heat_pump" in self.non_invasive_recommendations:
--- a/backend/README.md
+++ b/backend/README.md
@ -45,12 +45,14 @@ cp .env.example .env

 ## Running the Application

-from within the application you can run with the following command:
+from `model/backend/` you can run with the following command:

 ```commandline
 uvicorn app.main:app --reload
 ```

+Or run `sh run_local.sh`, which runs that same uvicorn command.
+
 You application will be available at the designated url

 ## API Documentation
@ -172,7 +174,7 @@ For instance, if your server is running locally on port 8000, you can use curl
 to get a dummy token:

 ```commandline
-curl http://localhost:8000/dummy-token
+curl http://localhost:8000/local/dummy-token
 ```

 You will receive a response containing the dummy JWT
--- a/backend/address2UPRN/handler/Dockerfile
+++ b/backend/address2UPRN/handler/Dockerfile
@ -1,4 +1,17 @@
 FROM public.ecr.aws/lambda/python:3.10
+# FROM python:3.11.10-bullseye
+
+
+ARG DEV_DB_HOST
+ARG DEV_DB_PORT
+ARG DEV_DB_NAME
+ARG EPC_AUTH_TOKEN
+
+ENV DB_HOST=${DEV_DB_HOST}
+ENV DB_PORT=${DEV_DB_PORT}
+ENV DB_NAME=${DEV_DB_NAME}
+ENV EPC_AUTH_TOKEN=${EPC_AUTH_TOKEN}
+

 # Set working directory (Lambda task root)
 WORKDIR /var/task
@ -8,13 +21,17 @@ WORKDIR /var/task
 # -----------------------------
 COPY backend/address2UPRN/handler/requirements.txt .

+
 # Install dependencies into Lambda runtime
 RUN pip install --no-cache-dir -r requirements.txt

-# -----------------------------
-# Copy application code
-# -----------------------------
+
+# Copy necessary files for database and utility imports
 COPY utils/ utils/
+COPY backend/ backend/
+COPY datatypes/ datatypes/
+
+# Copy the handler
 COPY backend/address2UPRN/main.py .

 # -----------------------------
--- a/backend/address2UPRN/handler/requirements.txt
+++ b/backend/address2UPRN/handler/requirements.txt
@ -1,3 +1,11 @@
-epc-api-python==1.0.2
+pandas==2.2.2
+numpy<2.0
+requests
 tqdm
-pandas
+openpyxl
+epc-api-python==1.0.2
+boto3==1.35.44
+sqlmodel
+sqlalchemy==2.0.36
+psycopg2-binary==2.9.10
+pydantic-settings==2.6.0
--- a/backend/address2UPRN/main.py
+++ b/backend/address2UPRN/main.py
@ -3,12 +3,23 @@ import os
 from urllib.parse import urlencode
 import pandas as pd
 from difflib import SequenceMatcher
-from tqdm import tqdm
 from utils.logger import setup_logger
+import re
+from typing import Set
+import json
+import requests
+from uuid import UUID
+import uuid
+from backend.app.db.functions.tasks.Tasks import SubTaskInterface
+from utils.s3 import (
+    save_csv_to_s3,
+    read_csv_from_s3 as read_csv_from_s3_dict,
+    parse_s3_uri,
+)
+from datetime import datetime

 logger = setup_logger()

-import re

 EPC_AUTH_TOKEN = os.getenv(
    "EPC_AUTH_TOKEN",
@ -17,9 +28,28 @@ EPC_AUTH_TOKEN = os.getenv(
 if EPC_AUTH_TOKEN is None:
    raise RuntimeError("EPC_AUTH_TOKEN not defined in env")

-import re
-from difflib import SequenceMatcher
-from typing import Set
+
+def is_valid_postcode(postcode_clean: str) -> bool:
+    """
+    Validate postcode using postcodes.io.
+
+    Expects a sanitised postcode (e.g. E84SQ).
+    Returns True if valid, False otherwise.
+    """
+    POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
+    if not postcode_clean:
+        return False
+
+    try:
+        resp = requests.get(
+            POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
+            timeout=5,
+        )
+        resp.raise_for_status()
+        return resp.json().get("result", False)
+    except requests.RequestException:
+        # Network issues, rate limits, etc.
+        return False


 def levenshtein(a: str, b: str) -> float:
@ -300,27 +330,29 @@ def get_uprn_candidates(
    )


-def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
+def get_uprn_with_epc_df(
+    user_inputed_address: str,
+    epc_df: pd.DataFrame,
+    verbose: bool = False,
+):
    """
-    Return uprn (str)
-    Return False if failed to find a sensible matching epc
-    Return Nons when epc found but no UPRN
+    Return uprn (str) using a pre-fetched EPC dataframe.
+    This avoids calling the API multiple times for the same postcode.
    """
-    df = get_epc_data_with_postcode(postcode=postcode)
-
-    if df.empty:
+    if epc_df.empty:
        return None

    scored_df = get_uprn_candidates(
-        df,
+        epc_df,
        user_address=user_inputed_address,
    )

    # Best score
    best_score = scored_df.iloc[0]["lexiscore"]

-    if best_score <= 0:
-        return None
+    # # Return None if score is below threshold
+    # if best_score < 0.7:
+    #     return None

    # All rank-1 rows (possible draw)
    top_rank_df = scored_df[scored_df["lexirank"] == 1]
@ -330,18 +362,41 @@ def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
        return None

    address = top_rank_df["address"].values[0]
-    lexiscore = float(top_rank_df["lexiscore"].values[0])
+    score = float(top_rank_df["lexiscore"].values[0])

-    logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
+    logger.info(f"Address found to be: {address}, with lexiscore {score}")
    # Safe to return the agreed UPRN
    found_uprn = top_rank_df.iloc[0]["uprn"]

    if found_uprn == "":
        return None

-    if return_address:
-        return found_uprn, address
-    return found_uprn
+    if verbose:
+        return (found_uprn, address, score)
+    else:
+        return found_uprn
+
+
+def get_uprn(
+    user_inputed_address: str,
+    postcode: str,
+    verbose: bool = False,
+):
+    """
+    Return uprn (str)
+    Return False if failed to find a sensible matching epc
+    Return None when epc found but no UPRN
+
+    This function fetches EPC data via API for a single postcode.
+    For processing multiple addresses in the same postcode, use get_uprn_with_epc_df instead.
+    """
+    df = get_epc_data_with_postcode(postcode=postcode)
+
+    return get_uprn_with_epc_df(
+        user_inputed_address=user_inputed_address,
+        epc_df=df,
+        verbose=verbose,
+    )


 def resolve_uprns_for_postcode_group(
@ -424,148 +479,302 @@ def resolve_uprns_for_postcode_group(
    )


-def test(a, b):
-    assert a == b, f"erorr: {a}{type(a)} != {b}: {type(b)}"
+def save_results_to_s3(
+    results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
+) -> bool:
+    """
+    Save results DataFrame to S3 as CSV.
+
+    :param results_df: The DataFrame containing results
+    :param task_id: The task ID (used for file naming)
+    :param bucket_name: The S3 bucket name (defaults to env variable)
+    :return: True if successful, False otherwise
+    """
+    if bucket_name is None:
+        bucket_name = os.getenv("S3_BUCKET_NAME")
+
+    if not bucket_name:
+        logger.error(
+            "S3 bucket name not provided and S3_BUCKET_NAME environment variable not set"
+        )
+        return False
+
+    try:
+        # Create a filename with the task ID
+        file_name = f"{datetime.now().isoformat()}_{str(uuid.uuid4())[:8]}"
+        file_key = f"ara_raw_outputs/{task_id}/{sub_task_id}/{file_name}.csv"
+
+        # Save to S3
+        success = save_csv_to_s3(results_df, bucket_name, file_key)
+
+        if success:
+            logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}")
+            return True
+        else:
+            logger.error(f"Failed to save results to S3")
+            return False
+
+    except Exception as e:
+        logger.error(f"Error saving results to S3: {str(e)}")
+        return False


-def run_all_test():
-    # Basic usage with different post codes styles
-    test(get_epc_data_with_postcode("b93 8sy").shape[0], 63)
-    test(get_epc_data_with_postcode("B938sy").shape[0], 63)
-    test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
-    test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
+def handler(event, context, local=False):
+    print("=== Address2UPRN Lambda Handler ===")
+    print(f"Function: {context.function_name}")
+    print(f"Request ID: {context.aws_request_id}")

-    test(get_uprn("68", "b93 8sy"), "100070989938")
-    test(get_uprn("68 Glendon Way", "b93 8sy"), "100070989938")
-    test(get_uprn("Flat A, 28, Nelgarde Road", "se6 4tf"), "100023278633")
-    test(get_uprn("28 A", "se6 4tf"), "100023278633")
-    test(get_uprn("28A", "se6 4tf"), "100023278633")
-    test(get_uprn("6 Aitken Close", "E8 4SQ"), False)
+    # Handle local testing
+    if local is True:
+        event = {
+            "Records": [
+                {
+                    "body": json.dumps(
+                        {
+                            "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
+                            "sub_task_id": "6a427b6e-1ece-4983-b1e5-9bffccc53d1d",
+                            "s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-16T12:00:20.257856_7b520c0e.csv",
+                        }
+                    )
+                }
+            ]
+        }

-    # unique case
-    test(get_uprn("Flat 5, 1, Semley Gate", "e9 5nh"), "10008238198")
-    test(get_uprn("5 ,  1 Semley Gate", "e9 5nh"), "10008238198")
-    test(get_uprn("5 Semley Gate", "e9 5nh"), "10008238198")
-    test(get_uprn("1, 5 Semley Gate", "e9 5nh"), False)
-    test(
-        get_uprn("1 Semley Gate", "e9 5nh"), "10008238188"
-    )  # this one return "flat 1, in 1 semley gate"
-    test(
-        get_uprn("48 Oswald Street", "E5 0BT"), False
-    )  # this one return "flat 1, in 1 semley gate"
-    test(
-        get_uprn("42 Oswald Street", "E5 0BT"), False
-    )  # this one return "flat 1, in 1 semley gate"
-    test(
-        get_uprn("46 Oswald Street", "E5 0BT"), False
-    )  # this one return "flat 1, in 1 semley gate"
-    get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street")
-    get_uprn_candidates(
-        get_epc_data_with_postcode("Cr2 7dl"),
-        "FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",
-    )
+    print(f"Event: {json.dumps(event, indent=2, default=str)}")
+    print("===================================")

+    # Handle both single event and batch events (SQS, etc.)
+    records = event.get("Records", [event])
+    results = []
+    errors = []
+    subtask_interface = SubTaskInterface()

-if __name__ == "__main__":
-    INPUT_FILE = "hackney.xlsx"
-
-    ADDRESS_COL = "Address 1"
-    POSTCODE_COL = "Postcode"
-    UPRN_COL = "UPRN"
-
-    df = pd.read_excel(INPUT_FILE)
-
-    failures = []
-
-    for _, row in tqdm(
-        df.iterrows(),
-        total=len(df),
-        desc="Auditing UPRNs",
-    ):
-        input_address = str(row[ADDRESS_COL]).strip()
-        postcode = str(row[POSTCODE_COL]).strip()
-
-        expected_uprn = None if pd.isna(row[UPRN_COL]) else str(int(row[UPRN_COL]))
-
+    for record in records:
+        task_id = None
+        subtask_id = None
        try:
-            epc_df = get_epc_data_with_postcode(postcode)
+            # Parse body (inputs)
+            if isinstance(record.get("body"), str):
+                body = json.loads(record["body"])
+            else:
+                body = record.get("body", {})

-            if epc_df.empty:
-                failures.append(
-                    {
-                        **row.to_dict(),
-                        "found_uprn": None,
-                        "best_match_uprn": None,
-                        "best_match_address": None,
-                        "best_match_lexiscore": None,
-                        "status": "no_epc_results",
-                    }
+            # Validate required fields
+            task_id = body.get("task_id")
+            subtask_id = body.get("sub_task_id")
+            s3_uri = body.get("s3_uri")
+
+            if not task_id:
+                errors.append({"error": "Missing required field: task_id"})
+                continue
+
+            if not subtask_id:
+                errors.append({"error": "Missing required field: sub_task_id"})
+                continue
+
+            if not s3_uri:
+                errors.append({"error": "Missing required field: s3_uri"})
+                continue
+
+            # Convert task_id to UUID
+            try:
+                task_id = UUID(task_id) if isinstance(task_id, str) else task_id
+            except ValueError as e:
+                errors.append({"error": f"Invalid UUID format for task_id: {str(e)}"})
+                continue
+
+            # Convert sub_task_id to UUID
+            try:
+                subtask_id = (
+                    UUID(subtask_id) if isinstance(subtask_id, str) else subtask_id
+                )
+            except ValueError as e:
+                errors.append(
+                    {"error": f"Invalid UUID format for sub_task_id: {str(e)}"}
                )
                continue

-            scored_df = get_uprn_candidates(
-                epc_df,
-                user_address=input_address,
-            )
+            # Update existing subtask to 'in progress'
+            subtask_interface.update_subtask_status(subtask_id, "in progress")
+            logger.info(f"Processing subtask {subtask_id} for task {task_id}")

-            best_row = scored_df.iloc[0]
+            # Parse S3 URI and read CSV from S3
+            logger.info(f"Reading data from S3: {s3_uri}")
+            try:
+                bucket, key = parse_s3_uri(s3_uri)
+                csv_data = read_csv_from_s3_dict(bucket, key)
+                df = pd.DataFrame(csv_data)
+                logger.info(f"Loaded {len(df)} rows from S3")
+            except Exception as s3_error:
+                logger.error(f"Failed to read data from S3: {s3_error}")
+                errors.append(
+                    {"error": "Failed to read data from S3", "details": str(s3_error)}
+                )
+                try:
+                    subtask_interface.update_subtask_status(
+                        subtask_id, "failed", outputs={"error": str(s3_error)}
+                    )
+                except Exception as db_error:
+                    logger.error(f"Failed to update subtask status: {db_error}")
+                continue

-            best_match_uprn = str(best_row["uprn"])
-            best_match_address = best_row["address"]
-            best_match_lexiscore = round(float(best_row["lexiscore"]), 4)
+            # Process the rows
+            logger.info(f"Processing {len(df)} rows for task {task_id}")

-            found_uprn = get_uprn(input_address, postcode)
+            # Create user_input column by concatenating Address columns if not already present
+            if "user_input" not in df.columns:
+                df["user_input"] = (
+                    df["Address 1"].fillna("")
+                    + " "
+                    + df["Address 2"].fillna("")
+                    + " "
+                    + df["Address 3"].fillna("")
+                ).str.strip()
+                logger.info(f"Created user_input column from Address 1 and Address 2")
+            else:
+                logger.info(f"user_input column already present in data")
+
+            clean_df = df.dropna(subset=["postcode_clean"])
+
+            postcode_to_addresses = {
+                postcode: group.to_dict(orient="records")
+                for postcode, group in clean_df.groupby("postcode_clean", sort=False)
+            }
+
+            logger.info(f"Total postcodes: {len(postcode_to_addresses)}")
+
+            # Process each postcode group
+
+            results_data = []
+
+            for postcode, postcode_rows in postcode_to_addresses.items():
+                logger.info(
+                    f"Processing postcode: {postcode} with {len(postcode_rows)} rows"
+                )
+
+                # Validate postcode before processing
+                if not is_valid_postcode(postcode):
+                    logger.warning(f"Postcode {postcode} is invalid, skipping")
+                    continue
+
+                # Fetch EPC data once per postcode
+                try:
+                    epc_df = get_epc_data_with_postcode(postcode=postcode)
+                    logger.info(
+                        f"Fetched {len(epc_df)} EPC records for postcode {postcode}"
+                    )
+                except Exception as e:
+                    logger.error(
+                        f"Failed to fetch EPC data for postcode {postcode}: {e}"
+                    )
+                    continue
+
+                # Process each address in this postcode with the same EPC data
+                for row in postcode_rows:
+                    try:
+                        user_input = row.get("user_input", "")
+                        if not user_input:
+                            logger.warning(
+                                f"Skipping row with missing user_input for postcode {postcode}"
+                            )
+                            continue
+
+                        # Get UPRN using the pre-fetched EPC data with all return options
+                        result = get_uprn_with_epc_df(
+                            user_inputed_address=user_input, epc_df=epc_df, verbose=True
+                        )
+
+                        # Parse result tuple if successful
+                        if result:
+                            uprn, found_address, score = result
+                            logger.info(
+                                f"Found UPRN for {user_input} in {postcode}: {uprn} (score: {score})"
+                            )
+
+                            results_data.append(
+                                {
+                                    **row,  # Include all original data
+                                    "uprn": uprn,
+                                    "domna_found_address": found_address,
+                                    "domna_lexiscore": score,
+                                }
+                            )
+                        else:
+                            logger.warning(
+                                f"No UPRN found for {user_input} in {postcode}"
+                            )
+                            results_data.append(
+                                {
+                                    **row,  # Include all original data
+                                    "uprn": None,
+                                    "domna_found_address": None,
+                                    "domna_lexiscore": None,
+                                }
+                            )
+
+                    except Exception as e:
+                        logger.error(
+                            f"Error processing address {row.get('user_input', 'unknown')}: {e}"
+                        )
+                        # Still add the row with error markers
+                        results_data.append(
+                            {
+                                **row,
+                                "uprn": None,
+                                "domna_found_address": None,
+                                "domna_lexiscore": None,
+                                "error": str(e),
+                            }
+                        )
+                        continue
+
+            # Create results DataFrame
+            result_df = pd.DataFrame(results_data)
+
+            # Save results to S3
+            try:
+                save_results_to_s3(result_df, str(task_id), str(subtask_id))
+            except Exception as s3_error:
+                logger.error(f"Failed to save results to S3: {s3_error}")
+
+            # Mark subtask as completed
+            try:
+                subtask_interface.update_subtask_status(
+                    subtask_id,
+                    "completed",
+                    outputs={"rows_processed": "todo -> show sensible output"},
+                )
+                logger.info(f"Marked subtask {subtask_id} as completed")
+            except Exception as db_error:
+                logger.error(f"Failed to mark subtask as completed: {db_error}")

        except Exception as e:
-            failures.append(
-                {
-                    **row.to_dict(),
-                    "found_uprn": None,
-                    "best_match_uprn": None,
-                    "best_match_address": None,
-                    "best_match_lexiscore": None,
-                    "status": "exception",
-                    "error": str(e),
-                }
-            )
-            continue
+            logger.error(f"Unexpected error processing record: {e}", exc_info=True)
+            errors.append({"error": "Unexpected error", "details": str(e)})
+            # Mark subtask as failed if we have one
+            if subtask_id:
+                try:
+                    subtask_interface.update_subtask_status(
+                        subtask_id, "failed", outputs={"error": str(e)}
+                    )
+                except Exception as db_error:
+                    logger.error(f"Failed to update subtask status: {db_error}")

-        found_uprn_norm = None if not found_uprn else str(found_uprn)
+    # Return error if all records failed
+    logger.info(results_data)
+    logger.info(results)
+    if errors and not results:
+        return {"statusCode": 500, "body": json.dumps({"errors": errors})}

-        if found_uprn_norm != expected_uprn:
-            failures.append(
-                {
-                    **row.to_dict(),
-                    "found_uprn": found_uprn_norm,
-                    "best_match_uprn": best_match_uprn,
-                    "best_match_address": best_match_address,
-                    "best_match_lexiscore": best_match_lexiscore,
-                    "status": ("no_match" if found_uprn_norm is None else "mismatch"),
-                }
-            )
-
-    failures_df = pd.DataFrame(failures)
-
-    print("===================================")
-    print(f"Total rows : {len(df)}")
-    print(f"Failures   : {len(failures_df)}")
-    print("===================================")
-
-    failures_df.to_excel(
-        "hackney_uprn_failures.xlsx",
-        index=False,
-    )
+    return {
+        "statusCode": 200,
+        "body": json.dumps(
+            {"processed": results, "errors": errors if errors else None}
+        ),
+    }


-def handler(event, context):
-    print("hello world")
-    return {"statusCode": 200, "body": "hello world"}
-
-
-# TO do function dispatcher,
-
-# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate)
-# fix that
-# Look again at flat 1
-# pandas reader the seperate postcode_splitter
-# dump into s3
+# TODO:
+# Don't add results to return messages as its too verbose
+# capture the exepection as e, into s3, to find the logs go to s3
+# Upload results to s3 as well as csv
--- a/backend/address2UPRN/script.py
+++ b/backend/address2UPRN/script.py
@ -1,3 +1,5 @@
+# one time script for a customer forhousing
+
 import pandas as pd
 from tqdm import tqdm
 from backend.address2UPRN.main import get_uprn
@ -5,20 +7,35 @@ from backend.address2UPRN.main import get_uprn
 # Enable tqdm for pandas
 tqdm.pandas()

-df = pd.read_excel("address2.xlsx")
+file_name = "forhousing.xlsx"
+
+df = pd.read_excel(file_name)


 def extract_uprn(row):
-    print(row["User Input"], row["Postcode"])
-    result = get_uprn(row["User Input"], row["Postcode"], return_address=True)
+    user_input = "Address"
+    postcode = "Postcode"
+    result = get_uprn(
+        row[user_input],
+        row[postcode],
+        return_address=True,
+        return_EPC=True,
+        return_score=True,
+    )

    if result is None:
-        return pd.Series([None, None])
+        return pd.Series([None, None, None, None])

-    uprn, found_address = result
-    return pd.Series([uprn, found_address])
+    uprn, found_address, epc, score = result
+    return pd.Series([uprn, found_address, epc, score])


-df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1)
+df[["juntes uprn", "junte found address", "junte found epc", "junte score"]] = (
+    df.progress_apply(extract_uprn, axis=1)
+)

-df.to_excel("outputs2.xlsx", index=False)
+df.to_excel(f"{file_name}_outputs.xlsx", index=False)
+
+# TODO: add lexiscore
+# TODO: run it
+# TODO: give it to danny
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -1,54 +1,67 @@
 import os
 from functools import lru_cache
+from pathlib import Path
 from pydantic_settings import BaseSettings, SettingsConfigDict
 from typing import Optional

+from utils.logger import setup_logger
+
+logger = setup_logger()
+

 def resolve_env_file() -> Optional[str]:
    env = os.getenv("ENVIRONMENT", "local")

+    backend_dir = Path(__file__).resolve().parents[1]
+
    if env == "local":
-        return "backend/.env"
+        env_file = backend_dir / ".env"
+        print("USING ENV FILE:", env_file)
+        logger.debug("USING ENV FILE:", env_file)
+        return str(env_file)

    if env == "test":
-        return "backend/.env.test"
+        env_file = backend_dir / ".env.test"
+        logger.debug("USING ENV FILE:", env_file)
+        return str(env_file)

    # prod = no env file
    return None


 class Settings(BaseSettings):
-    API_KEY: str
+    API_KEY: str = "changeme"
    API_KEY_NAME: str = "X-API-KEY"
-    SECRET_KEY: str
-    ENVIRONMENT: str
-    DATA_BUCKET: str
+    SECRET_KEY: str = "changeme"
+    ENVIRONMENT: str = "changeme"
+    DATA_BUCKET: str = "changeme"
    PLAN_TRIGGER_BUCKET: str
-    ENGINE_SQS_URL: str
+    ENGINE_SQS_URL: str = "changeme"
+    CATEGORISATION_SQS_URL: str = "changeme"

    # Third parties
-    EPC_AUTH_TOKEN: str
-    GOOGLE_SOLAR_API_KEY: str
+    EPC_AUTH_TOKEN: str = "changeme"
+    GOOGLE_SOLAR_API_KEY: str = "changeme"

    # Database settings
-    DB_HOST: str
-    DB_PASSWORD: str
-    DB_USERNAME: str
-    DB_PORT: str
-    DB_NAME: str
+    DB_HOST: str = "changeme"
+    DB_PASSWORD: str = "changeme"
+    DB_USERNAME: str = "changeme"
+    DB_PORT: str = "changeme"
+    DB_NAME: str = "changeme"

    # Prediction buckets
-    SAP_PREDICTIONS_BUCKET: str
-    CARBON_PREDICTIONS_BUCKET: str
-    HEAT_PREDICTIONS_BUCKET: str
+    SAP_PREDICTIONS_BUCKET: str = "changeme"
+    CARBON_PREDICTIONS_BUCKET: str = "changeme"
+    HEAT_PREDICTIONS_BUCKET: str = "changeme"
    # LIGHTING_COST_PREDICTIONS_BUCKET: str
    # HEATING_COST_PREDICTIONS_BUCKET: str
    # HOT_WATER_COST_PREDICTIONS_BUCKET: str
-    HEATING_KWH_PREDICTIONS_BUCKET: str
-    HOTWATER_KWH_PREDICTIONS_BUCKET: str
+    HEATING_KWH_PREDICTIONS_BUCKET: str = "changeme"
+    HOTWATER_KWH_PREDICTIONS_BUCKET: str = "changeme"

    # Other S3 buckts
-    ENERGY_ASSESSMENTS_BUCKET: str
+    ENERGY_ASSESSMENTS_BUCKET: str = "changeme"

    # Optional AWS creds (only required in local)
    AWS_ACCESS_KEY_ID: Optional[str] = None
--- a/backend/app/db/base.py
+++ b/backend/app/db/base.py
@ -0,0 +1,5 @@
+from sqlalchemy.orm import DeclarativeBase
+
+
+class Base(DeclarativeBase):
+    pass
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@ -1,5 +1,10 @@
 from sqlalchemy import func
-from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation, Scenario
+from backend.app.db.models.recommendations import (
+    PlanModel,
+    PlanRecommendations,
+    Recommendation,
+    ScenarioModel,
+)


 def aggregate_portfolio_recommendations(
@ -8,7 +13,7 @@ def aggregate_portfolio_recommendations(
    scenario_id: int,
    total_valuation_increase: float,
    labour_days: float,
-    aggregated_data: dict
+    aggregated_data: dict,
 ):
    # Aggregate multiple fields
    aggregates = (
@ -16,15 +21,20 @@ def aggregate_portfolio_recommendations(
            func.sum(Recommendation.estimated_cost).label("cost"),
            func.sum(Recommendation.total_work_hours).label("total_work_hours"),
            func.sum(Recommendation.kwh_savings).label("energy_savings"),
-            func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"),
+            func.sum(Recommendation.co2_equivalent_savings).label(
+                "co2_equivalent_savings"
+            ),
            func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"),
        )
-        .join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id)
-        .join(Plan, Plan.id == PlanRecommendations.plan_id)
+        .join(
+            PlanRecommendations,
+            PlanRecommendations.recommendation_id == Recommendation.id,
+        )
+        .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
        .filter(
-            Plan.portfolio_id == portfolio_id,
-            Plan.scenario_id == scenario_id,
-            Recommendation.default == True
+            PlanModel.portfolio_id == portfolio_id,
+            PlanModel.scenario_id == scenario_id,
+            Recommendation.default == True,
        )
        .one()
    )
@ -36,11 +46,11 @@ def aggregate_portfolio_recommendations(
        "energy_savings": aggregates.energy_savings or 0,
        "co2_equivalent_savings": aggregates.co2_equivalent_savings or 0,
        "energy_cost_savings": aggregates.energy_cost_savings or 0,
-        **aggregated_data
+        **aggregated_data,
    }

    # Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio
-    portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one()
+    portfolio_scenario = session.query(ScenarioModel).filter_by(id=scenario_id).one()

    # Update the data
    for key, value in aggregates_dict.items():
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@ -1,17 +1,42 @@
-from sqlalchemy import text
-from sqlalchemy import insert, delete
-from sqlalchemy.orm import Session
+from typing import Any, Dict, List, Optional
+from sqlalchemy import (
+    ColumnElement,
+    and_,
+    func,
+    inspect,
+    text,
+    insert,
+    delete,
+    select,
+)
+from sqlalchemy.orm import Session, Mapper
 from sqlalchemy.exc import SQLAlchemyError
+from sqlmodel import Session
+
 from backend.app.db.models.recommendations import (
-    Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario
+    PlanModel,
+    Recommendation,
+    RecommendationMaterials,
+    PlanRecommendations,
+    ScenarioModel,
 )
 from backend.app.db.models.portfolio import PropertyModel
 from backend.app.db.connection import db_session, db_read_session


 def prepare_plan_data(
-    p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations,
-    rebaselining_carbon=0, rebaselining_heat_demand=0, rebaselining_kwh=0, rebaselining_bills=0,
+    p,
+    body,
+    scenario_id,
+    eco_packages,
+    valuations,
+    new_sap_points,
+    new_epc,
+    default_recommendations,
+    rebaselining_carbon=0,
+    rebaselining_heat_demand=0,
+    rebaselining_kwh=0,
+    rebaselining_bills=0,
 ):
    """
    Utility function to prepare the data that goes into the production of a plan. Is a fairly rough and unstructured
@ -32,21 +57,37 @@ def prepare_plan_data(
    """
    # Plan carbon savings
    co2_savings = sum(
-        [r["co2_equivalent_savings"] for r in default_recommendations if not r.get("already_installed", False)]
+        [
+            r["co2_equivalent_savings"]
+            for r in default_recommendations
+            if not r.get("already_installed", False)
+        ]
    )
    post_co2_emissions = p.energy["co2_emissions"] - rebaselining_carbon - co2_savings

    # Plan bill savings
    energy_bill_savings = sum(
-        [r["energy_cost_savings"] for r in default_recommendations if not r.get("already_installed", False)]
+        [
+            r["energy_cost_savings"]
+            for r in default_recommendations
+            if not r.get("already_installed", False)
+        ]
+    )
+    post_energy_bill = (
+        sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings
    )
-    post_energy_bill = sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings

    # energy consumption
    energy_consumption_savings = sum(
-        [r["kwh_savings"] for r in default_recommendations if not r.get("already_installed", False)]
+        [
+            r["kwh_savings"]
+            for r in default_recommendations
+            if not r.get("already_installed", False)
+        ]
+    )
+    post_energy_consumption = (
+        p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings
    )
-    post_energy_consumption = p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings

    valuation_post_retrofit, valuation_increase = None, None
    if valuations["current_value"]:
@ -54,9 +95,19 @@ def prepare_plan_data(
        valuation_post_retrofit = valuations["average_increased_value"]

    # plan costing data
-    cost_of_works = sum([r["total"] for r in default_recommendations if not r.get("already_installed", False)])
+    cost_of_works = sum(
+        [
+            r["total"]
+            for r in default_recommendations
+            if not r.get("already_installed", False)
+        ]
+    )
    contingency_cost = sum(
-        [r.get("contingency", 0) for r in default_recommendations if not r.get("already_installed", False)]
+        [
+            r.get("contingency", 0)
+            for r in default_recommendations
+            if not r.get("already_installed", False)
+        ]
    )

    return {
@ -86,7 +137,7 @@ def prepare_plan_data(
        "valuation_increase": valuation_increase,
        "cost_of_works": float(cost_of_works),
        "contingency_cost": float(contingency_cost),
-        "plan_type": eco_packages.get(p.id, (None, None, None))[2]
+        "plan_type": eco_packages.get(p.id, (None, None, None))[2],
    }


@ -97,7 +148,7 @@ def create_plan(session: Session, plan):
    :param plan: dictionary of data representing a plan to be created
    """
    try:
-        new_plan = Plan(**plan)
+        new_plan = PlanModel(**plan)
        session.add(new_plan)
        session.flush()
        session.commit()
@ -120,9 +171,7 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int
    ]

    stmt = (
-        insert(Plan)
-        .values(payload)
-        .returning(Plan.id, Plan.property_id)
+        insert(PlanModel).values(payload).returning(PlanModel.id, PlanModel.property_id)
    )

    result = session.execute(stmt).all()
@ -133,14 +182,14 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int

 def create_scenario(session: Session, scenario: dict) -> int:
    existing_scenario = (
-        session.query(Scenario)
+        session.query(ScenarioModel)
        .filter_by(portfolio_id=scenario["portfolio_id"])
        .first()
    )

    scenario["is_default"] = not bool(existing_scenario)

-    new_scenario = Scenario(**scenario)
+    new_scenario = ScenarioModel(**scenario)
    session.add(new_scenario)
    session.flush()  # ensures ID is populated

@ -167,7 +216,9 @@ def create_recommendation(session: Session, recommendation):
        raise e


-def create_recommendation_material(session: Session, recommendation_id, material_id, depth):
+def create_recommendation_material(
+    session: Session, recommendation_id, material_id, depth
+):
    """
    This function will create a record for the recommendation_material in the database if it does not exist.
    :param session: The databse session
@ -177,9 +228,7 @@ def create_recommendation_material(session: Session, recommendation_id, material
    """

    new_recommendation_material = RecommendationMaterials(
-        recommendation_id=recommendation_id,
-        material_id=material_id,
-        depth=depth
+        recommendation_id=recommendation_id, material_id=material_id, depth=depth
    )
    session.add(new_recommendation_material)
    session.flush()
@ -196,13 +245,17 @@ def create_plan_recommendations(session: Session, plan_id, recommendation_ids):
    """

    # Prepare a list of dictionaries for bulk insert
-    data = [{"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids]
+    data = [
+        {"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids
+    ]

    # Bulk insert using SQLAlchemy's core API
    session.execute(insert(PlanRecommendations).values(data))


-def upload_recommendations(session: Session, recommendations_to_upload, property_id, new_plan_id):
+def upload_recommendations(
+    session: Session, recommendations_to_upload, property_id, new_plan_id
+):
    try:
        # Prepare data for bulk insert for Recommendation
        recommendations_data = [
@ -213,8 +266,14 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
                "description": rec["description"],
                "estimated_cost": float(rec["total"]),
                "default": rec["default"],
-                "starting_u_value": float(rec.get("starting_u_value")) if rec.get("starting_u_value") else None,
-                "new_u_value": float(rec.get("new_u_value")) if rec.get("new_u_value") else None,
+                "starting_u_value": (
+                    float(rec.get("starting_u_value"))
+                    if rec.get("starting_u_value")
+                    else None
+                ),
+                "new_u_value": (
+                    float(rec.get("new_u_value")) if rec.get("new_u_value") else None
+                ),
                "sap_points": float(rec["sap_points"]),
                "energy_savings": float(rec["heat_demand"]),
                "kwh_savings": float(rec["kwh_savings"]),
@ -223,13 +282,17 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
                "energy_cost_savings": float(rec["energy_cost_savings"]),
                "labour_days": float(rec["labour_days"]),
                "already_installed": rec["already_installed"],
-                "heat_demand": float(rec["heat_demand"])
+                "heat_demand": float(rec["heat_demand"]),
            }
            for rec in recommendations_to_upload
        ]

        # Insert the recommendations, get back the IDs
-        stmt = insert(Recommendation).returning(Recommendation.id).values(recommendations_data)
+        stmt = (
+            insert(Recommendation)
+            .returning(Recommendation.id)
+            .values(recommendations_data)
+        )
        result = session.execute(stmt)
        uploaded_recommendation_ids = [row[0] for row in result]

@ -243,11 +306,15 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
                "quantity_unit": part.get("quantity_unit", None),
                "estimated_cost": float(part.get("total", part.get("total_cost"))),
            }
-            for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids)
+            for rec, recommendation_id in zip(
+                recommendations_to_upload, uploaded_recommendation_ids
+            )
            for part in rec["parts"]
        ]

-        session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data)
+        session.bulk_insert_mappings(
+            RecommendationMaterials, recommendation_materials_data
+        )

        # flush the changes to get the newly created IDs
        session.flush()
@ -283,25 +350,27 @@ def bulk_upload_recommendations_and_materials(
    plan_ids_by_index = []

    for rec in recommendation_payload:
-        recommendation_rows.append({
-            "property_id": rec["property_id"],
-            "type": rec["type"],
-            "measure_type": rec["measure_type"],
-            "description": rec["description"],
-            "estimated_cost": rec["estimated_cost"],
-            "default": rec["default"],
-            "starting_u_value": rec["starting_u_value"],
-            "new_u_value": rec["new_u_value"],
-            "sap_points": rec["sap_points"],
-            "heat_demand": rec["heat_demand"],
-            "kwh_savings": rec["kwh_savings"],
-            "co2_equivalent_savings": rec["co2_equivalent_savings"],
-            "energy_savings": rec["energy_savings"],
-            "energy_cost_savings": rec["energy_cost_savings"],
-            "total_work_hours": rec["total_work_hours"],
-            "labour_days": rec["labour_days"],
-            "already_installed": rec["already_installed"],
-        })
+        recommendation_rows.append(
+            {
+                "property_id": rec["property_id"],
+                "type": rec["type"],
+                "measure_type": rec["measure_type"],
+                "description": rec["description"],
+                "estimated_cost": rec["estimated_cost"],
+                "default": rec["default"],
+                "starting_u_value": rec["starting_u_value"],
+                "new_u_value": rec["new_u_value"],
+                "sap_points": rec["sap_points"],
+                "heat_demand": rec["heat_demand"],
+                "kwh_savings": rec["kwh_savings"],
+                "co2_equivalent_savings": rec["co2_equivalent_savings"],
+                "energy_savings": rec["energy_savings"],
+                "energy_cost_savings": rec["energy_cost_savings"],
+                "total_work_hours": rec["total_work_hours"],
+                "labour_days": rec["labour_days"],
+                "already_installed": rec["already_installed"],
+            }
+        )

        parts_by_index.append(rec["parts"])
        plan_ids_by_index.append(rec["plan_id"])
@ -310,9 +379,7 @@ def bulk_upload_recommendations_and_materials(
    # 2. Insert recommendations and get IDs
    # ---------------------------------------------------------
    result = session.execute(
-        insert(Recommendation)
-        .values(recommendation_rows)
-        .returning(Recommendation.id)
+        insert(Recommendation).values(recommendation_rows).returning(Recommendation.id)
    )

    recommendation_ids = [row[0] for row in result]
@ -324,19 +391,19 @@ def bulk_upload_recommendations_and_materials(

    for recommendation_id, parts in zip(recommendation_ids, parts_by_index):
        for part in parts:
-            materials_rows.append({
-                "recommendation_id": recommendation_id,
-                "material_id": part["material_id"],
-                "depth": part["depth"],
-                "quantity": part["quantity"],
-                "quantity_unit": part["quantity_unit"],
-                "estimated_cost": part["estimated_cost"],
-            })
+            materials_rows.append(
+                {
+                    "recommendation_id": recommendation_id,
+                    "material_id": part["material_id"],
+                    "depth": part["depth"],
+                    "quantity": part["quantity"],
+                    "quantity_unit": part["quantity_unit"],
+                    "estimated_cost": part["estimated_cost"],
+                }
+            )

    if materials_rows:
-        session.execute(
-            insert(RecommendationMaterials).values(materials_rows)
-        )
+        session.execute(insert(RecommendationMaterials).values(materials_rows))

    # ---------------------------------------------------------
    # 4. Insert plan ↔ recommendation links
@ -346,26 +413,22 @@ def bulk_upload_recommendations_and_materials(
            "plan_id": plan_id,
            "recommendation_id": recommendation_id,
        }
-        for plan_id, recommendation_id in zip(
-            plan_ids_by_index, recommendation_ids
-        )
+        for plan_id, recommendation_id in zip(plan_ids_by_index, recommendation_ids)
    ]

-    session.execute(
-        insert(PlanRecommendations).values(plan_recommendation_rows)
-    )
+    session.execute(insert(PlanRecommendations).values(plan_recommendation_rows))


 def chunked(iterable, size=100):
    for i in range(0, len(iterable), size):
-        yield iterable[i:i + size]
+        yield iterable[i : i + size]


 def get_property_ids(portfolio_id: int) -> list[int]:
    with db_read_session() as session:
        return [
-            pid for (pid,) in
-            session.query(PropertyModel.id)
+            pid
+            for (pid,) in session.query(PropertyModel.id)
            .filter(PropertyModel.portfolio_id == portfolio_id)
            .all()
        ]
@ -381,12 +444,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # recommendation_materials (via recommendation)
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation_materials rm
            USING recommendation r
            WHERE rm.recommendation_id = r.id
              AND r.property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -394,12 +459,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # plan_recommendations (via plan)
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan_recommendations pr
            USING plan p
            WHERE pr.plan_id = p.id
              AND p.property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -407,13 +474,15 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # funding_package_measures
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM funding_package_measures fpm
            USING funding_package fp, plan p
            WHERE fpm.funding_package_id = fp.id
              AND fp.plan_id = p.id
              AND p.property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -421,10 +490,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # inspections (direct)
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM inspections
            WHERE property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -432,12 +503,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # funding_package
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM funding_package fp
            USING plan p
            WHERE fp.plan_id = p.id
              AND p.property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -445,10 +518,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # recommendation (direct — CRITICAL FIX)
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation
            WHERE property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -456,10 +531,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # plan (direct)
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan
            WHERE property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -467,18 +544,22 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # property-scoped tables
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM property_details_epc
            WHERE property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM property_targets
            WHERE property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -486,10 +567,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # properties LAST
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM property
            WHERE id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -510,8 +593,7 @@ def delete_portfolio_scenarios_if_empty(portfolio_id: int):

    with db_session() as session:
        session.execute(
-            delete(Scenario)
-            .where(Scenario.portfolio_id == portfolio_id)
+            delete(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id)
        )

    print("Deleted scenarios for empty portfolio")
@ -530,6 +612,7 @@ def clear_portfolio_in_batches(

    total = (len(property_ids) + property_batch_size - 1) // property_batch_size
    import time
+
    for i, batch in enumerate(chunked(property_ids, property_batch_size), start=1):
        print(f"Deleting batch {i}/{total} ({len(batch)} properties)")
        start_time = time.time()
@ -542,3 +625,163 @@ def clear_portfolio_in_batches(
    delete_portfolio_scenarios_if_empty(portfolio_id)

    print("Portfolio cleared in batches.")
+
+
+def get_plans_by_scenario_ids(ids: List[int]) -> List[PlanModel]:
+    stmt = select(PlanModel).where(PlanModel.scenario_id.in_(ids))
+    with db_read_session() as session:
+        session_any: Any = session  # Typehint as Any to satisfy Pylance...
+        return session_any.exec(stmt).scalars().all()
+
+
+def get_most_recent_plans_by_portfolio_id(
+    portfolio_id: int,
+    min_property_id: Optional[int] = None,
+    max_property_id: Optional[int] = None,
+) -> List[PlanModel]:
+    filters = [PlanModel.portfolio_id == portfolio_id]
+
+    if min_property_id is not None:
+        filters.append(PlanModel.property_id >= min_property_id)
+    if max_property_id is not None:
+        filters.append(PlanModel.property_id <= max_property_id)
+
+    # NOTE: This statement works for Postgres only, because of the Distinct
+    stmt = (
+        select(PlanModel)
+        .where(and_(*filters))
+        .distinct(
+            PlanModel.property_id, PlanModel.scenario_id
+        )  # one plan per property per scenario
+        .order_by(
+            PlanModel.property_id,
+            PlanModel.scenario_id,
+            PlanModel.created_at.desc(),
+            PlanModel.id.desc(),
+        )
+    )
+    with db_read_session() as session:
+        session_any: Any = session  # Typehint as Any to satisfy Pylance...
+        return session_any.exec(stmt).scalars().all()
+
+
+def get_most_recent_plans_by_scenario_ids(
+    scenario_ids: List[int],
+    min_property_id: Optional[int] = None,
+    max_property_id: Optional[int] = None,
+) -> List[PlanModel]:
+    if not scenario_ids:
+        return []
+
+    # Base filter: scenario_id in provided list
+    filters: List[ColumnElement[bool]] = [PlanModel.scenario_id.in_(scenario_ids)]
+
+    # Add optional property ID range filters
+    if min_property_id is not None:
+        filters.append(PlanModel.property_id >= min_property_id)
+    if max_property_id is not None:
+        filters.append(PlanModel.property_id <= max_property_id)
+
+    # NOTE: This statement works for Postgres only, because of the Distinct
+    stmt = (
+        select(PlanModel)
+        .where(and_(*filters))
+        .distinct(
+            PlanModel.property_id, PlanModel.scenario_id
+        )  # one plan per property per scenario
+        .order_by(
+            PlanModel.property_id,
+            PlanModel.scenario_id,
+            PlanModel.created_at.desc(),
+            PlanModel.id.desc(),
+        )
+    )
+
+    with db_read_session() as session:
+        session_any: Any = session  # Typehint as Any to satisfy Pylance
+        return session_any.exec(stmt).scalars().all()
+
+
+def get_scenarios_by_portfolio_id(portfolio_id: int) -> List[ScenarioModel]:
+    stmt = select(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id)
+    with db_read_session() as session:
+        session_any: Any = session  # Typehint as Any to satisfy Pylance...
+        return session_any.exec(stmt).scalars().all()
+
+
+def get_scenarios_count_by_portfolio_id(portfolio_id: int) -> int:
+    stmt = (
+        select(func.count())
+        .select_from(ScenarioModel)
+        .where(ScenarioModel.portfolio_id == portfolio_id)
+    )
+    with db_read_session() as session:
+        session_any: Any = session  # Typehint as Any to satisfy Pylance...
+        return session_any.exec(stmt).scalar_one()
+
+
+def get_default_plans(
+    portfolio_id: int,
+    min_property_id: Optional[int] = None,
+    max_property_id: Optional[int] = None,
+) -> List[PlanModel]:
+    filters: List[ColumnElement[bool]] = [
+        PlanModel.portfolio_id == portfolio_id,
+        PlanModel.is_default.is_(True),
+    ]
+
+    if min_property_id is not None:
+        filters.append(PlanModel.property_id >= min_property_id)
+    if max_property_id is not None:
+        filters.append(PlanModel.property_id <= max_property_id)
+
+    stmt = select(PlanModel).where(and_(*filters))
+
+    with db_read_session() as session:
+        session_any: Any = session  # Typehint as Any to satisfy Pylance...
+        plans: List[PlanModel] = session_any.exec(stmt).scalars().all()
+        return plans
+
+
+def bulk_update_plans(
+    plan_models: List[PlanModel],
+    scenario_models: List[ScenarioModel],
+) -> int:
+    if not plan_models:
+        return 0
+
+    with db_read_session() as session:
+
+        plan_mapper: Mapper[Any] = inspect(PlanModel)
+        scenario_mapper: Mapper[Any] = inspect(ScenarioModel)
+
+        plan_mappings: List[Dict[str, Any]] = (
+            []
+        )  # Typehint as Any to satisfy Pylance...
+        for plan in plan_models:
+            data: Dict[str, Any] = {
+                c.name: getattr(plan, c.name)
+                for c in plan.__table__.columns
+                if c.name != "id"
+            }
+            data["id"] = plan.id
+            plan_mappings.append(data)
+
+        session.bulk_update_mappings(plan_mapper, plan_mappings)
+
+        scenario_mappings: List[Dict[str, Any]] = (
+            []
+        )  # Typehint as Any to satisfy Pylance...
+        for scenario in scenario_models:
+            data: Dict[str, Any] = {
+                c.name: getattr(scenario, c.name)
+                for c in scenario.__table__.columns
+                if c.name not in {"id", "portfolio_id"}
+            }
+            data["id"] = scenario.id
+            scenario_mappings.append(data)
+
+        session.bulk_update_mappings(scenario_mapper, scenario_mappings)
+
+        session.commit()
+        return len(plan_models)
--- a/backend/app/db/functions/tasks/Tasks.py
+++ b/backend/app/db/functions/tasks/Tasks.py
@ -11,7 +11,7 @@ from sqlmodel import Session, select
 from backend.app.db.connection import get_db_session

 # ---- Models ----
-from backend.app.db.models.tasks import Task, SubTask
+from backend.app.db.models.tasks import SourceEnum, Task, SubTask


 # ============================================================
@ -25,7 +25,12 @@ class SubTaskInterface:
    # --------------------------------------------------------
    # CREATE SUBTASK
    # --------------------------------------------------------
-    def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None, status=None):
+    def create_subtask(
+        self,
+        task_id: UUID,
+        inputs: Optional[Dict[str, Any]] = None,
+        status: Optional[str] = None,
+    ):

        now = datetime.now(timezone.utc)
        with get_db_session() as session:
@ -56,8 +61,12 @@ class SubTaskInterface:
    # UPDATE STATUS (in progress, complete, failed)
    # --------------------------------------------------------
    def update_subtask_status(
-        self, subtask_id: UUID, status: str, outputs=None, cloud_logs_url=None
-    ):
+        self,
+        subtask_id: UUID,
+        status: str,
+        outputs: Optional[Dict[str, str]] = None,
+        cloud_logs_url: Optional[str] = None,
+    ) -> SubTask:
        """
        Update the status of a subtask, and recalculate the parent task progress.
        :param subtask_id: UUID of the subtask to update
@ -177,9 +186,7 @@ class SubTaskInterface:
        if not task:
            return

-        subtasks = session.exec(
-            select(SubTask).where(SubTask.task_id == task_id)
-        ).all()
+        subtasks = session.exec(select(SubTask).where(SubTask.task_id == task_id)).all()

        statuses = [s.status.lower() for s in subtasks]
        now = datetime.now(timezone.utc)
@ -211,7 +218,7 @@ class SubTaskInterface:
        subtask_id: UUID,
        status: str,
        outputs: Optional[Dict[str, Any]],
-        cloud_logs_url: Optional[str]
+        cloud_logs_url: Optional[str],
    ):
        now = datetime.now(timezone.utc)

@ -261,6 +268,8 @@ class TasksInterface:
        service: Optional[str] = None,
        inputs: Optional[Dict[str, Any]] = None,
        task_only: bool = False,
+        source: Optional[SourceEnum] = None,
+        source_id: Optional[str] = None,
    ):
        """
        Create a new Task record, and an initial SubTask in waiting state. Can also be used to create just
@ -279,6 +288,8 @@ class TasksInterface:
                status="waiting",
                job_started=now,
                job_completed=None,
+                source=source,
+                source_id=source_id,
            )

            session.add(task)
--- a/backend/app/db/functions/tasks/init.py
+++ b/backend/app/db/functions/tasks/init.py
--- a/backend/app/db/models/addresses.py
+++ b/backend/app/db/models/addresses.py
@ -7,9 +7,7 @@ from sqlalchemy import (
    func,
    UniqueConstraint,
 )
-from sqlalchemy.orm import declarative_base
-
-Base = declarative_base()
+from backend.app.db.base import Base


 class PostcodeSearch(Base):
--- a/backend/app/db/models/condition.py
+++ b/backend/app/db/models/condition.py
@ -7,12 +7,12 @@ from sqlalchemy import (
    String,
    Enum as SqlEnum,
 )
-from sqlalchemy.orm import declarative_base, relationship
+from sqlalchemy.orm import relationship

 from backend.condition.domain.aspect_type import AspectType
 from backend.condition.domain.element_type import ElementType

-Base = declarative_base()
+from backend.app.db.base import Base

 ElementTypeDb = SqlEnum(
    ElementType,
--- a/backend/app/db/models/energy_assessments.py
+++ b/backend/app/db/models/energy_assessments.py
@ -1,10 +1,8 @@
-from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.dialects.postgresql import ENUM as PgEnum
 import enum
 from datetime import datetime
-
-Base = declarative_base()
+from backend.app.db.base import Base
+from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey
+from sqlalchemy.dialects.postgresql import ENUM as PgEnum


 class EnergyAssessment(Base):
@ -190,7 +188,7 @@ class EnergyAssessmentDocuments(Base):
    id = Column(BigInteger, primary_key=True, autoincrement=True)
    uprn = Column(BigInteger, nullable=False)
    energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False)
-    document_type = Column(PgEnum(DocumentTypeEnum, name="document_type", create_type=False), nullable=False)
+    document_type = Column(PgEnum(DocumentTypeEnum, name="document_type"), nullable=False)
    document_location = Column(Text, nullable=False)
    uploaded_at = Column(DateTime(timezone=True), nullable=False, default=datetime.utcnow)
    scenario_id = Column(BigInteger, ForeignKey('energy_assessment_scenarios.id'), nullable=True)
--- a/backend/app/db/models/epc.py
+++ b/backend/app/db/models/epc.py
@ -4,11 +4,8 @@ from sqlalchemy import (
    String,
    JSON,
    TIMESTAMP,
-    UniqueConstraint,
 )
-from sqlalchemy.orm import declarative_base
-
-Base = declarative_base()
+from backend.app.db.base import Base


 class EpcStore(Base):
--- a/backend/app/db/models/funding.py
+++ b/backend/app/db/models/funding.py
@ -1,13 +1,19 @@
 import enum

-from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, BigInteger, ForeignKey
-from sqlalchemy.orm import declarative_base
+from sqlalchemy import (
+    Column,
+    Integer,
+    Float,
+    Enum,
+    TIMESTAMP,
+    BigInteger,
+    ForeignKey,
+)
 from sqlalchemy.sql import func
-from backend.app.db.models.recommendations import Plan
+from backend.app.db.base import Base
+from backend.app.db.models.recommendations import PlanModel
 from backend.app.db.models.materials import MaterialType, Material

-Base = declarative_base()
-

 class SchemeEnum(enum.Enum):
    eco4 = "eco4"
@ -17,13 +23,17 @@ class SchemeEnum(enum.Enum):


 class FundingPackage(Base):
-    __tablename__ = 'funding_package'
+    __tablename__ = "funding_package"

    id = Column(Integer, primary_key=True, autoincrement=True)
-    plan_id = Column(BigInteger, ForeignKey(Plan.id), nullable=False)
+    plan_id = Column(BigInteger, ForeignKey(PlanModel.id), nullable=False)
    scheme = Column(
-        Enum(SchemeEnum, values_callable=lambda x: [e.value for e in x], create_constraint=False),
-        nullable=False
+        Enum(
+            SchemeEnum,
+            values_callable=lambda x: [e.value for e in x],
+            create_constraint=False,
+        ),
+        nullable=False,
    )
    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
    project_funding = Column(Float)
@ -34,15 +44,23 @@ class FundingPackage(Base):


 class FundingPackageMeasures(Base):
-    __tablename__ = 'funding_package_measures'
+    __tablename__ = "funding_package_measures"

    id = Column(Integer, primary_key=True, autoincrement=True)
-    funding_package_id = Column(BigInteger, ForeignKey(FundingPackage.id), nullable=False)
-    measure = Column(
-        Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False),
-        nullable=False
+    funding_package_id = Column(
+        BigInteger, ForeignKey(FundingPackage.id), nullable=False
    )
-    material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False)  # Assuming material table exists
+    measure = Column(
+        Enum(
+            MaterialType,
+            values_callable=lambda x: [e.value for e in x],
+            create_constraint=False,
+        ),
+        nullable=False,
+    )
+    material_id = Column(
+        BigInteger, ForeignKey(Material.id), nullable=False
+    )  # Assuming material table exists
    innovation_uplift = Column(Float)
    partial_project_score = Column(Float)
    uplift_project_score = Column(Float)
--- a/backend/app/db/models/inspections.py
+++ b/backend/app/db/models/inspections.py
@ -9,11 +9,9 @@ from sqlalchemy import (
    Enum,
    ForeignKey,
 )
-from sqlalchemy.ext.declarative import declarative_base
+from backend.app.db.base import Base
 from backend.app.db.models.portfolio import PropertyModel

-Base = declarative_base()
-

 # -------------------------------------------------------------------
 # ENUM DEFINITIONS (equivalent to drizzle pgEnum calls)
--- a/backend/app/db/models/materials.py
+++ b/backend/app/db/models/materials.py
@ -1,10 +1,9 @@
 import enum

 from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, Boolean
-from sqlalchemy.orm import declarative_base
 from sqlalchemy.sql import func

-Base = declarative_base()
+from backend.app.db.base import Base


 class MaterialType(enum.Enum):
--- a/backend/app/db/models/non_intrusive_surveys.py
+++ b/backend/app/db/models/non_intrusive_surveys.py
@ -1,7 +1,5 @@
 from sqlalchemy import Column, BigInteger, String, TIMESTAMP, ForeignKey, Integer
-from sqlalchemy.orm import declarative_base
-
-Base = declarative_base()
+from backend.app.db.base import Base


 class NonIntrusiveSurvey(Base):
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@ -1,13 +1,22 @@
 import enum
 import pytz
 import datetime
-from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint
-from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy import (
+    Column,
+    Integer,
+    BigInteger,
+    Text,
+    Boolean,
+    Float,
+    DateTime,
+    Enum,
+    ForeignKey,
+    CheckConstraint,
+)
+from backend.app.db.base import Base
 from backend.app.db.models.users import UserModel  # noqa
 from backend.app.db.models.materials import MaterialType

-Base = declarative_base()
-

 class PortfolioStatus(enum.Enum):
    SCOPING = "scoping"
@ -22,7 +31,7 @@ class PortfolioStatus(enum.Enum):
    NEEDS_REVIEW = "needs review"


-class PortfolioGoal(enum.Enum):
+class PortfolioGoal(enum.Enum):  # TODO: Move to domain?
    VALUATION_IMPROVEMENT = "Valuation Improvement"
    INCREASING_EPC = "Increasing EPC"
    REDUCING_CO2_EMISSIONS = "Reducing CO2 emissions"
@ -31,23 +40,43 @@ class PortfolioGoal(enum.Enum):


 class Portfolio(Base):
-    __tablename__ = 'portfolio'
+    __tablename__ = "portfolio"
    id = Column(Integer, primary_key=True, autoincrement=True)
    name = Column(Text, nullable=False)
    budget = Column(Float)
-    status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False)
-    goal = Column(Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]), nullable=False)
+    status = Column(
+        Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]),
+        nullable=False,
+    )
+    goal = Column(
+        Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]),
+        nullable=False,
+    )
    cost = Column(Float)
    number_of_properties = Column(Integer)
-    co2_equivalent_savings = Column(Float)  # Unit is always tonnes so we don't need to store the unit
-    energy_savings = Column(Float)  # Unit is always kWh so we don't need to store the unit
-    energy_cost_savings = Column(Float)  # Unit is always £ so we don't need to store the unit for the moment
-    property_valuation_increase = Column(Float)  # Unit is always £ so we don't need to store the unit for the moment
-    rental_yield_increase = Column(Float)  # Unit is always £ so we don't need to store the unit for the moment
+    co2_equivalent_savings = Column(
+        Float
+    )  # Unit is always tonnes so we don't need to store the unit
+    energy_savings = Column(
+        Float
+    )  # Unit is always kWh so we don't need to store the unit
+    energy_cost_savings = Column(
+        Float
+    )  # Unit is always £ so we don't need to store the unit for the moment
+    property_valuation_increase = Column(
+        Float
+    )  # Unit is always £ so we don't need to store the unit for the moment
+    rental_yield_increase = Column(
+        Float
+    )  # Unit is always £ so we don't need to store the unit for the moment
    total_work_hours = Column(Float)
    labour_days = Column(Float)
-    created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
-    updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
+    created_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
+    updated_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
    # Aggregations for summary
    epc_breakdown_pre_retrofit = Column(Text)
    epc_breakdown_post_retrofit = Column(Text)
@ -71,7 +100,7 @@ class PropertyCreationStatus(enum.Enum):
    ERROR = "ERROR"


-class Epc(enum.Enum):
+class Epc(enum.Enum):  # TODO: Move to domain?
    A = "A"
    B = "B"
    C = "C"
@ -82,20 +111,27 @@ class Epc(enum.Enum):


 class PropertyModel(Base):
-    __tablename__ = 'property'
+    __tablename__ = "property"
    id = Column(Integer, primary_key=True, autoincrement=True)
-    portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
+    portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
    creation_status = Column(Enum(PropertyCreationStatus), nullable=False)
-    uprn = Column(Integer)
+    uprn = Column(BigInteger)
    landlord_property_id = Column(Text)
-    building_reference_number = Column(Integer)
-    status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False)
+    building_reference_number = Column(BigInteger)
+    status = Column(
+        Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]),
+        nullable=False,
+    )
    address = Column(Text)
    postcode = Column(Text)
    has_pre_condition_report = Column(Boolean)
    has_recommendations = Column(Boolean)
-    created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
-    updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
+    created_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
+    updated_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
    property_type = Column(Text)
    built_form = Column(Text)
    local_authority = Column(Text)
@ -127,7 +163,7 @@ rating_lookup = {
    "Average": FeatureRating.AVERAGE,
    "Poor": FeatureRating.POOR,
    "Very Poor": FeatureRating.VERY_POOR,
-    "N/A": FeatureRating.NA
+    "N/A": FeatureRating.NA,
 }


@ -136,32 +172,45 @@ def get_feature_rating_from_string(rating_str: str):


 class PropertyDetailsEpcModel(Base):
-    __tablename__ = 'property_details_epc'
+    __tablename__ = "property_details_epc"
    id = Column(Integer, primary_key=True, autoincrement=True)
-    property_id = Column(Integer, ForeignKey('property.id'), nullable=False)
-    portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
+    property_id = Column(Integer, ForeignKey("property.id"), nullable=False)
+    portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
    full_address = Column(Text)
    lodgement_date = Column(DateTime)
    is_expired = Column(Boolean)
    total_floor_area = Column(Float)
    walls = Column(Text)
-    walls_rating = Column(Integer, CheckConstraint('walls_rating>=1 AND walls_rating<=5'))
+    walls_rating = Column(
+        Integer, CheckConstraint("walls_rating>=1 AND walls_rating<=5")
+    )
    roof = Column(Text)
-    roof_rating = Column(Integer, CheckConstraint('roof_rating>=1 AND roof_rating<=5'))
+    roof_rating = Column(Integer, CheckConstraint("roof_rating>=1 AND roof_rating<=5"))
    floor = Column(Text)
-    floor_rating = Column(Integer, CheckConstraint('floor_rating>=1 AND floor_rating<=5'))
+    floor_rating = Column(
+        Integer, CheckConstraint("floor_rating>=1 AND floor_rating<=5")
+    )
    windows = Column(Text)
-    windows_rating = Column(Integer, CheckConstraint('windows_rating>=1 AND windows_rating<=5'))
+    windows_rating = Column(
+        Integer, CheckConstraint("windows_rating>=1 AND windows_rating<=5")
+    )
    heating = Column(Text)
-    heating_rating = Column(Integer, CheckConstraint('heating_rating>=1 AND heating_rating<=5'))
+    heating_rating = Column(
+        Integer, CheckConstraint("heating_rating>=1 AND heating_rating<=5")
+    )
    heating_controls = Column(Text)
    heating_controls_rating = Column(
-        Integer, CheckConstraint('heating_controls_rating>=1 AND heating_controls_rating<=5')
+        Integer,
+        CheckConstraint("heating_controls_rating>=1 AND heating_controls_rating<=5"),
    )
    hot_water = Column(Text)
-    hot_water_rating = Column(Integer, CheckConstraint('hot_water_rating>=1 AND hot_water_rating<=5'))
+    hot_water_rating = Column(
+        Integer, CheckConstraint("hot_water_rating>=1 AND hot_water_rating<=5")
+    )
    lighting = Column(Text)
-    lighting_rating = Column(Integer, CheckConstraint('lighting_rating>=1 AND lighting_rating<=5'))
+    lighting_rating = Column(
+        Integer, CheckConstraint("lighting_rating>=1 AND lighting_rating<=5")
+    )
    mainfuel = Column(Text)
    ventilation = Column(Text)
    solar_pv = Column(Text)
@ -219,7 +268,7 @@ class PropertyDetailsSpatial(Base):


 class PropertyDetailsMeter(Base):
-    __tablename__ = 'property_details_meter'
+    __tablename__ = "property_details_meter"
    id = Column(Integer, primary_key=True, autoincrement=True)
    uprn = Column(Integer, nullable=False)
    energy_supplier = Column(Text)
@ -230,11 +279,13 @@ class PropertyDetailsMeter(Base):


 class PropertyTargetsModel(Base):
-    __tablename__ = 'property_targets'
+    __tablename__ = "property_targets"
    id = Column(Integer, primary_key=True, autoincrement=True)
-    property_id = Column(Integer, ForeignKey('property.id'), nullable=False)
-    portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
-    created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
+    property_id = Column(Integer, ForeignKey("property.id"), nullable=False)
+    portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
+    created_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
    epc = Column(Enum(Epc))
    heat_demand = Column(Text)

@ -242,23 +293,36 @@ class PropertyTargetsModel(Base):
 class PortfolioUsers(Base):
    __tablename__ = "portfolioUsers"
    id = Column(Integer, primary_key=True, autoincrement=True)
-    user_id = Column(Integer, ForeignKey('user.id'), nullable=False)
-    portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
+    user_id = Column(Integer, ForeignKey("user.id"), nullable=False)
+    portfolioId = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
    role = Column(Text, nullable=False)
-    created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
-    updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
+    created_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
+    updated_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )


 class PropertyInstalledMeasures(Base):
    """
    This model keeps a record of the installed measures for each property, at the UPRN level
    """
-    __tablename__ = 'property_installed_measures'
+
+    __tablename__ = "property_installed_measures"
    id = Column(Integer, primary_key=True, autoincrement=True)
    uprn = Column(Integer, nullable=False)
    measure_type = Column(
-        Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False),
-        nullable=False
+        Enum(
+            MaterialType,
+            values_callable=lambda x: [e.value for e in x],
+            create_constraint=False,
+        ),
+        nullable=False,
+    )
+    created_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
+    installed_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
    )
-    created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
-    installed_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@ -1,17 +1,32 @@
-from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum
-from sqlalchemy.orm import declarative_base
+import enum
+from typing import Iterable, List, NamedTuple, Optional, Type
+from sqlalchemy import (
+    Column,
+    BigInteger,
+    String,
+    Float,
+    Boolean,
+    TIMESTAMP,
+    ForeignKey,
+    Enum,
+)
+from sqlalchemy.orm import Mapped, mapped_column
 from sqlalchemy.sql import func
-from backend.app.db.models.portfolio import Portfolio, PropertyModel
+from datetime import datetime
+
+from backend.app.db.base import Base
+from backend.app.db.models.portfolio import Portfolio, PortfolioGoal, PropertyModel
 from backend.app.db.models.materials import Material
 from backend.app.db.models.portfolio import Epc
 from datatypes.enums import QuantityUnits
-import enum

-Base = declarative_base()
+
+def portfolio_goal_values(enum_cls: Type[PortfolioGoal]) -> List[str]:
+    return [e.value for e in enum_cls]


 class Recommendation(Base):
-    __tablename__ = 'recommendation'
+    __tablename__ = "recommendation"

    id = Column(BigInteger, primary_key=True, autoincrement=True)
    property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
@ -37,19 +52,52 @@ class Recommendation(Base):


 class RecommendationMaterials(Base):
-    __tablename__ = 'recommendation_materials'
+    __tablename__ = "recommendation_materials"

-    id = Column(BigInteger, primary_key=True, autoincrement=True)
-    recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
-    material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False)
-    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
-    depth = Column(Float, nullable=False)
-    quantity = Column(Float, nullable=False)
-    quantity_unit = Column(Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), nullable=False)
-    estimated_cost = Column(Float, nullable=False)
+    id: Mapped[int] = mapped_column(
+        BigInteger, primary_key=True, autoincrement=True
+    )
+
+    recommendation_id: Mapped[int] = mapped_column(
+        BigInteger,
+        ForeignKey("recommendation.id"),
+        nullable=False,
+    )
+
+    material_id: Mapped[int] = mapped_column(
+        BigInteger,
+        ForeignKey(Material.id),
+        nullable=False,
+    )
+
+    created_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP,
+        nullable=False,
+        server_default=func.now(),
+    )
+
+    depth: Mapped[float] = mapped_column(
+        Float,
+        nullable=False,
+    )
+
+    quantity: Mapped[float] = mapped_column(
+        Float,
+        nullable=False,
+    )
+
+    quantity_unit: Mapped[QuantityUnits] = mapped_column(
+        Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]),
+        nullable=False,
+    )
+
+    estimated_cost: Mapped[float] = mapped_column(
+        Float,
+        nullable=False,
+    )


-class PlanTypeEnum(enum.Enum):
+class PlanTypeEnum(enum.Enum):  # TODO: move this to domain?
    SOLAR_ECO4 = "solar_eco4"
    SOLAR_HHRSH_ECO4 = "solar_hhrsh_eco4"
    EMPTY_CAVITY_ECO = "empty_cavity_eco"
@ -57,20 +105,36 @@ class PlanTypeEnum(enum.Enum):
    EXTRACTION_ECO = "extraction_eco"


-class Plan(Base):
-    __tablename__ = 'plan'
+class PlanModel(Base):
+    __tablename__ = "plan"

-    id = Column(BigInteger, primary_key=True, autoincrement=True)
-    name = Column(String, nullable=True, default="")
-    portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
-    property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
-    scenario_id = Column(BigInteger, ForeignKey('scenario.id'))  # Doesn't have to be linked to a scenario
-    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
-    is_default = Column(Boolean, nullable=False)
-    valuation_increase_lower_bound = Column(Float)
-    valuation_increase_upper_bound = Column(Float)
-    valuation_increase_average = Column(Float)
-    plan_type = Column(
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
+
+    name: Mapped[Optional[str]] = mapped_column(String, nullable=True, default="")
+
+    portfolio_id: Mapped[int] = mapped_column(
+        BigInteger, ForeignKey(Portfolio.id), nullable=False
+    )
+
+    property_id: Mapped[int] = mapped_column(
+        BigInteger, ForeignKey(PropertyModel.id), nullable=False
+    )
+
+    scenario_id: Mapped[Optional[int]] = mapped_column(
+        BigInteger, ForeignKey("scenario.id")
+    )
+
+    created_at: Mapped[datetime] = mapped_column(  # type: ignore
+        TIMESTAMP, nullable=False, server_default=func.now()
+    )
+
+    is_default: Mapped[bool] = mapped_column(Boolean, nullable=False)
+
+    valuation_increase_lower_bound: Mapped[Optional[float]] = mapped_column(Float)
+    valuation_increase_upper_bound: Mapped[Optional[float]] = mapped_column(Float)
+    valuation_increase_average: Mapped[Optional[float]] = mapped_column(Float)
+
+    plan_type: Mapped[Optional[PlanTypeEnum]] = mapped_column(
        Enum(
            PlanTypeEnum,
            name="plan_type",
@ -79,73 +143,90 @@ class Plan(Base):
        ),
        nullable=True,
    )
-    post_sap_points = Column(Float)
-    post_epc_rating = Column(Enum(Epc))
-    post_co2_emissions = Column(Float)
-    co2_savings = Column(Float)
-    post_energy_bill = Column(Float)
-    energy_bill_savings = Column(Float)
-    post_energy_consumption = Column(Float)  # energy demand in kWh/year
-    energy_consumption_savings = Column(Float)
-    valuation_post_retrofit = Column(Float)
-    valuation_increase = Column(Float)
+
+    post_sap_points: Mapped[Optional[float]] = mapped_column(Float)
+    post_epc_rating: Mapped[Optional[Epc]] = mapped_column(Enum(Epc))
+    post_co2_emissions: Mapped[Optional[float]] = mapped_column(Float)
+    co2_savings: Mapped[Optional[float]] = mapped_column(Float)
+    post_energy_bill: Mapped[Optional[float]] = mapped_column(Float)
+    energy_bill_savings: Mapped[Optional[float]] = mapped_column(Float)
+    post_energy_consumption: Mapped[Optional[float]] = mapped_column(Float)
+    energy_consumption_savings: Mapped[Optional[float]] = mapped_column(Float)
+    valuation_post_retrofit: Mapped[Optional[float]] = mapped_column(Float)
+    valuation_increase: Mapped[Optional[float]] = mapped_column(Float)
+
    # Financial metrics, excluding funding
-    cost_of_works = Column(Float)
-    contingency_cost = Column(Float)
+    cost_of_works: Mapped[Optional[float]] = mapped_column(Float)
+    contingency_cost: Mapped[Optional[float]] = mapped_column(Float)


 class PlanRecommendations(Base):
-    __tablename__ = 'plan_recommendations'
+    __tablename__ = "plan_recommendations"

    id = Column(BigInteger, primary_key=True, autoincrement=True)
-    plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False)
-    recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
+    plan_id = Column(BigInteger, ForeignKey("plan.id"), nullable=False)
+    recommendation_id = Column(
+        BigInteger, ForeignKey("recommendation.id"), nullable=False
+    )


-class Scenario(Base):
-    __tablename__ = 'scenario'
+class ScenarioModel(Base):
+    __tablename__ = "scenario"

-    id = Column(BigInteger, primary_key=True, autoincrement=True)
-    name = Column(String, nullable=False)
-    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
-    budget = Column(Float)
-    portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
-    housing_type = Column(String, nullable=False)
-    goal = Column(String, nullable=False)
-    goal_value = Column(String, nullable=False)
-    trigger_file_path = Column(String, nullable=False)
-    already_installed_file_path = Column(String)
-    patches_file_path = Column(String)
-    non_invasive_recommendations_file_path = Column(String)
-    exclusions = Column(String)
-    multi_plan = Column(Boolean, default=False)
-    is_default = Column(Boolean, default=False, nullable=False)
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
+    name: Mapped[str] = mapped_column(String, nullable=False)
+    created_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP, nullable=False, server_default=func.now()
+    )
+    budget: Mapped[Optional[float]] = mapped_column(Float)
+    portfolio_id: Mapped[int] = mapped_column(
+        BigInteger, ForeignKey(Portfolio.id), nullable=False
+    )
+    housing_type: Mapped[str] = mapped_column(String, nullable=False)
+    goal: Mapped[PortfolioGoal] = mapped_column(
+        Enum(PortfolioGoal, values_callable=portfolio_goal_values, name="goal"),
+        nullable=False,
+    )
+    goal_value: Mapped[str] = mapped_column(String, nullable=False)
+    trigger_file_path: Mapped[str] = mapped_column(String, nullable=False)
+    already_installed_file_path: Mapped[Optional[str]] = mapped_column(String)
+    patches_file_path: Mapped[Optional[str]] = mapped_column(String)
+    non_invasive_recommendations_file_path: Mapped[Optional[str]] = mapped_column(
+        String
+    )
+    exclusions: Mapped[Optional[str]] = mapped_column(String)
+    multi_plan: Mapped[bool] = mapped_column(Boolean, default=False)
+    is_default: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)

    # Add in the fields we need, which were previously sitting at the portfolio level
-    cost = Column(Float)
-    contingency = Column(Float)
-    funding = Column(Float)
-    total_work_hours = Column(Float)
-    energy_savings = Column(Float)
-    co2_equivalent_savings = Column(Float)
-    energy_cost_savings = Column(Float)
-    epc_breakdown_pre_retrofit = Column(String)
-    epc_breakdown_post_retrofit = Column(String)
-    number_of_properties = Column(BigInteger)
-    n_units_to_retrofit = Column(BigInteger)
-    co2_per_unit_pre_retrofit = Column(String)
-    co2_per_unit_post_retrofit = Column(String)
-    energy_bill_per_unit_pre_retrofit = Column(String)
-    energy_bill_per_unit_post_retrofit = Column(String)
-    energy_consumption_per_unit_pre_retrofit = Column(String)
-    energy_consumption_per_unit_post_retrofit = Column(String)
-    valuation_improvement_per_unit = Column(String)
-    cost_per_unit = Column(String)
-    cost_per_co2_saved = Column(String)
-    cost_per_sap_point = Column(String)
-    valuation_return_on_investment = Column(String)
-    property_valuation_increase = Column(Float)
-    labour_days = Column(Float)
+    cost: Mapped[Optional[float]] = mapped_column(Float)
+    contingency: Mapped[Optional[float]] = mapped_column(Float)
+    funding: Mapped[Optional[float]] = mapped_column(Float)
+    total_work_hours: Mapped[Optional[float]] = mapped_column(Float)
+    energy_savings: Mapped[Optional[float]] = mapped_column(Float)
+    co2_equivalent_savings: Mapped[Optional[float]] = mapped_column(Float)
+    energy_cost_savings: Mapped[Optional[float]] = mapped_column(Float)
+    epc_breakdown_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
+    epc_breakdown_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
+    number_of_properties: Mapped[Optional[int]] = mapped_column(BigInteger)
+    n_units_to_retrofit: Mapped[Optional[int]] = mapped_column(BigInteger)
+    co2_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
+    co2_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
+    energy_bill_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
+    energy_bill_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
+    energy_consumption_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(
+        String
+    )
+    energy_consumption_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(
+        String
+    )
+    valuation_improvement_per_unit: Mapped[Optional[str]] = mapped_column(String)
+    cost_per_unit: Mapped[Optional[str]] = mapped_column(String)
+    cost_per_co2_saved: Mapped[Optional[str]] = mapped_column(String)
+    cost_per_sap_point: Mapped[Optional[str]] = mapped_column(String)
+    valuation_return_on_investment: Mapped[Optional[str]] = mapped_column(String)
+    property_valuation_increase: Mapped[Optional[float]] = mapped_column(Float)
+    labour_days: Mapped[Optional[float]] = mapped_column(Float)


 class MeasureType(enum.Enum):
@ -201,3 +282,12 @@ class InstalledMeasure(Base):
    heat_demand_savings = Column(Float)
    source = Column(String)
    is_active = Column(Boolean, nullable=False, default=True)
+
+
+def enum_values(e: Iterable[PlanTypeEnum]) -> list[str]:
+    return [m.value for m in e]
+
+
+class PlanPersistence(NamedTuple):
+    plan: PlanModel
+    scenario: ScenarioModel
--- a/backend/app/db/models/solar.py
+++ b/backend/app/db/models/solar.py
@ -2,9 +2,7 @@ import datetime
 import pytz
 from enum import Enum as PyEnum
 from sqlalchemy import Column, Integer, Float, DateTime, JSON, BigInteger, ForeignKey, Enum, Boolean
-from sqlalchemy.ext.declarative import declarative_base
-
-Base = declarative_base()
+from backend.app.db.base import Base


 class Solar(Base):
--- a/backend/app/db/models/tasks.py
+++ b/backend/app/db/models/tasks.py
@ -1,14 +1,24 @@
+import enum
 from typing import Optional
 from datetime import datetime
 from uuid import UUID, uuid4

+from sqlalchemy import Column, Enum
 from sqlmodel import SQLModel, Field, Relationship


+class SourceEnum(enum.Enum):  # TODO: move to domain?
+    PORTFOLIO = "portfolio_id"
+
+
 class Task(SQLModel, table=True):
    __tablename__ = "tasks"

-    id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, )
+    id: UUID = Field(
+        default_factory=uuid4,
+        primary_key=True,
+        index=True,
+    )
    task_source: str
    job_started: Optional[datetime] = None
    job_completed: Optional[datetime] = None
@ -16,13 +26,32 @@ class Task(SQLModel, table=True):
    service: Optional[str] = None
    updated_at: datetime = Field(default_factory=datetime.utcnow)

+    # source: Mapped[Optional[SourceEnum]] = mapped_column(Enum(SourceEnum)) <- SQLAlchemy not SQLModel
+
+    source: Optional[SourceEnum] = Field(
+        default=None,
+        sa_column=Column(
+            Enum(
+                SourceEnum,
+                name="source",
+                values_callable=lambda e: [m.value for m in e],
+            ),
+            nullable=True,
+        ),
+    )
+    source_id: Optional[str] = None
+
    sub_tasks: list["SubTask"] = Relationship(back_populates="task")


 class SubTask(SQLModel, table=True):
    __tablename__ = "sub_task"

-    id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, )
+    id: UUID = Field(
+        default_factory=uuid4,
+        primary_key=True,
+        index=True,
+    )

    task_id: UUID = Field(foreign_key="tasks.id")
    job_started: Optional[datetime] = None
--- a/backend/app/db/models/users.py
+++ b/backend/app/db/models/users.py
@ -1,8 +1,6 @@
 from sqlalchemy import Column, Integer, String, DateTime
-from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.sql import func
-
-Base = declarative_base()
+from backend.app.db.base import Base


 class UserModel(Base):
--- a/backend/app/db/models/whlg.py
+++ b/backend/app/db/models/whlg.py
@ -1,4 +1,3 @@
-import uuid
 from typing import Optional
 from sqlmodel import SQLModel, Field

@ -12,4 +11,4 @@ class Whlg(SQLModel, table=True):
        index=True,
    )

-    postcode: str = Field(nullable=False)
+    postcode: str = Field(nullable=False)
--- a/backend/app/domain/classes/plan.py
+++ b/backend/app/domain/classes/plan.py
@ -0,0 +1,160 @@
+from __future__ import annotations
+from dataclasses import replace
+from typing import Optional
+
+from backend.app.db.models.portfolio import PortfolioGoal
+from backend.app.db.models.recommendations import (
+    PlanModel,
+    PlanPersistence,
+    ScenarioModel,
+)
+from backend.app.domain.classes.scenario import Scenario
+from backend.app.domain.records.plan_record import PlanRecord
+from backend.app.utils import sap_to_epc
+
+
+class Plan:
+    def __init__(
+        self, record: PlanRecord, scenario: Scenario, id: Optional[int] = None
+    ):
+        self.id: Optional[int] = id
+        self.record: PlanRecord = record
+        self.scenario: Scenario = scenario
+
+    @classmethod
+    def from_sqlalchemy(cls, plan_model: PlanModel, scenario: Scenario) -> Plan:
+        if not scenario:
+            raise ValueError(f"No Scenario associated with Plan of ID {plan_model.id}")
+
+        record = PlanRecord(
+            property_id=plan_model.property_id,
+            portfolio_id=plan_model.portfolio_id,
+            created_at=plan_model.created_at,
+            is_default=plan_model.is_default,
+            valuation_increase_lower_bound=plan_model.valuation_increase_lower_bound,
+            valuation_increase_upper_bound=plan_model.valuation_increase_upper_bound,
+            valuation_increase_average=plan_model.valuation_increase_average,
+            plan_type=plan_model.plan_type,
+            post_sap_points=plan_model.post_sap_points,
+            post_epc_rating=plan_model.post_epc_rating,
+            post_co2_emissions=plan_model.post_co2_emissions,
+            co2_savings=plan_model.co2_savings,
+            post_energy_bill=plan_model.post_energy_bill,
+            energy_bill_savings=plan_model.energy_bill_savings,
+            post_energy_consumption=plan_model.post_energy_consumption,
+            energy_consumption_savings=plan_model.energy_consumption_savings,
+            valuation_post_retrofit=plan_model.valuation_post_retrofit,
+            valuation_increase=plan_model.valuation_increase,
+            cost_of_works=plan_model.cost_of_works,
+            contingency_cost=plan_model.contingency_cost,
+            name=plan_model.name,
+        )
+        return cls(record=record, scenario=scenario, id=plan_model.id)
+
+    @property
+    def is_compliant(self) -> bool:
+        goal: PortfolioGoal = self.scenario.record.goal
+
+        match goal:
+            case PortfolioGoal.INCREASING_EPC:
+                return self._is_compliant_epc()
+            case _:
+                raise NotImplementedError
+
+    @property
+    def cost(self) -> float:
+        return (
+            self.record.cost_of_works
+            if self.record.cost_of_works is not None
+            else float("inf")
+        )
+
+    def to_sqlalchemy(self) -> PlanPersistence:
+        scenario_record = self.scenario.record
+
+        scenario_model = ScenarioModel(
+            id=self.scenario.id,
+            name=scenario_record.name,
+            created_at=scenario_record.created_at,
+            housing_type=scenario_record.housing_type,
+            goal=scenario_record.goal,
+            goal_value=scenario_record.goal_value,
+            trigger_file_path=scenario_record.trigger_file_path,
+            multi_plan=scenario_record.multi_plan,
+            is_default=scenario_record.is_default,
+            budget=scenario_record.budget,
+            already_installed_file_path=scenario_record.already_installed_file_path,
+            patches_file_path=scenario_record.patches_file_path,
+            non_invasive_recommendations_file_path=scenario_record.non_invasive_recommendations_file_path,
+            exclusions=scenario_record.exclusions,
+            cost=scenario_record.cost,
+            contingency=scenario_record.contingency,
+            funding=scenario_record.funding,
+            total_work_hours=scenario_record.total_work_hours,
+            energy_savings=scenario_record.energy_savings,
+            co2_equivalent_savings=scenario_record.co2_equivalent_savings,
+            energy_cost_savings=scenario_record.energy_cost_savings,
+            epc_breakdown_pre_retrofit=scenario_record.epc_breakdown_pre_retrofit,
+            epc_breakdown_post_retrofit=scenario_record.epc_breakdown_post_retrofit,
+            number_of_properties=scenario_record.number_of_properties,
+            n_units_to_retrofit=scenario_record.n_units_to_retrofit,
+            co2_per_unit_pre_retrofit=scenario_record.co2_per_unit_pre_retrofit,
+            co2_per_unit_post_retrofit=scenario_record.co2_per_unit_post_retrofit,
+            energy_bill_per_unit_pre_retrofit=scenario_record.energy_bill_per_unit_pre_retrofit,
+            energy_bill_per_unit_post_retrofit=scenario_record.energy_bill_per_unit_post_retrofit,
+            energy_consumption_per_unit_pre_retrofit=scenario_record.energy_consumption_per_unit_pre_retrofit,
+            energy_consumption_per_unit_post_retrofit=scenario_record.energy_consumption_per_unit_post_retrofit,
+            valuation_improvement_per_unit=scenario_record.valuation_improvement_per_unit,
+            cost_per_unit=scenario_record.cost_per_unit,
+            cost_per_co2_saved=scenario_record.cost_per_co2_saved,
+            cost_per_sap_point=scenario_record.cost_per_sap_point,
+            valuation_return_on_investment=scenario_record.valuation_return_on_investment,
+            property_valuation_increase=scenario_record.property_valuation_increase,
+            labour_days=scenario_record.labour_days,
+        )
+
+        record = self.record
+
+        plan_model = PlanModel(
+            id=self.id,
+            property_id=record.property_id,
+            portfolio_id=record.portfolio_id,
+            scenario_id=self.scenario.id,
+            created_at=record.created_at,
+            is_default=record.is_default,
+            valuation_increase_lower_bound=record.valuation_increase_lower_bound,
+            valuation_increase_upper_bound=record.valuation_increase_upper_bound,
+            valuation_increase_average=record.valuation_increase_average,
+            plan_type=record.plan_type,
+            post_sap_points=record.post_sap_points,
+            post_epc_rating=record.post_epc_rating,
+            post_co2_emissions=record.post_co2_emissions,
+            co2_savings=record.co2_savings,
+            post_energy_bill=record.post_energy_bill,
+            energy_bill_savings=record.energy_bill_savings,
+            post_energy_consumption=record.post_energy_consumption,
+            energy_consumption_savings=record.energy_consumption_savings,
+            valuation_post_retrofit=record.valuation_post_retrofit,
+            valuation_increase=record.valuation_increase,
+            cost_of_works=record.cost_of_works,
+            contingency_cost=record.contingency_cost,
+            name=record.name,
+        )
+
+        return PlanPersistence(plan=plan_model, scenario=scenario_model)
+
+    def set_default(self, value: bool) -> None:
+        self.record = replace(self.record, is_default=value)
+        self.scenario.record = replace(self.scenario.record, is_default=value)
+
+    def _is_compliant_epc(self) -> bool:
+        goal_value: str = self.scenario.record.goal_value
+
+        if self.record.post_epc_rating:
+            post_epc = self.record.post_epc_rating.value
+        elif self.record.post_sap_points:
+            post_epc = sap_to_epc(self.record.post_sap_points)
+        else:
+            return False
+
+        return post_epc <= goal_value
--- a/backend/app/domain/classes/scenario.py
+++ b/backend/app/domain/classes/scenario.py
@ -0,0 +1,58 @@
+from __future__ import annotations
+from dataclasses import replace
+from typing import Optional
+
+from backend.app.db.models.recommendations import ScenarioModel
+from backend.app.domain.records.scenario_record import ScenarioRecord
+
+
+class Scenario:
+    def __init__(self, record: ScenarioRecord, id: Optional[int] = None):
+        self.id = id
+        self.record = record
+
+    @classmethod
+    def from_sqlalchemy(cls, scenario_model: ScenarioModel) -> Scenario:
+        record = ScenarioRecord(
+            name=scenario_model.name,
+            created_at=scenario_model.created_at,
+            housing_type=scenario_model.housing_type,
+            goal=scenario_model.goal,
+            goal_value=scenario_model.goal_value,
+            trigger_file_path=scenario_model.trigger_file_path,
+            multi_plan=scenario_model.multi_plan,
+            is_default=scenario_model.is_default,
+            budget=scenario_model.budget,
+            already_installed_file_path=scenario_model.already_installed_file_path,
+            patches_file_path=scenario_model.patches_file_path,
+            non_invasive_recommendations_file_path=scenario_model.non_invasive_recommendations_file_path,
+            exclusions=scenario_model.exclusions,
+            cost=scenario_model.cost,
+            contingency=scenario_model.contingency,
+            funding=scenario_model.funding,
+            total_work_hours=scenario_model.total_work_hours,
+            energy_savings=scenario_model.energy_savings,
+            co2_equivalent_savings=scenario_model.co2_equivalent_savings,
+            energy_cost_savings=scenario_model.energy_cost_savings,
+            epc_breakdown_pre_retrofit=scenario_model.epc_breakdown_pre_retrofit,
+            epc_breakdown_post_retrofit=scenario_model.epc_breakdown_post_retrofit,
+            number_of_properties=scenario_model.number_of_properties,
+            n_units_to_retrofit=scenario_model.n_units_to_retrofit,
+            co2_per_unit_pre_retrofit=scenario_model.co2_per_unit_pre_retrofit,
+            co2_per_unit_post_retrofit=scenario_model.co2_per_unit_post_retrofit,
+            energy_bill_per_unit_pre_retrofit=scenario_model.energy_bill_per_unit_pre_retrofit,
+            energy_bill_per_unit_post_retrofit=scenario_model.energy_bill_per_unit_post_retrofit,
+            energy_consumption_per_unit_pre_retrofit=scenario_model.energy_consumption_per_unit_pre_retrofit,
+            energy_consumption_per_unit_post_retrofit=scenario_model.energy_consumption_per_unit_post_retrofit,
+            valuation_improvement_per_unit=scenario_model.valuation_improvement_per_unit,
+            cost_per_unit=scenario_model.cost_per_unit,
+            cost_per_co2_saved=scenario_model.cost_per_co2_saved,
+            cost_per_sap_point=scenario_model.cost_per_sap_point,
+            valuation_return_on_investment=scenario_model.valuation_return_on_investment,
+            property_valuation_increase=scenario_model.property_valuation_increase,
+            labour_days=scenario_model.labour_days,
+        )
+        return cls(record, scenario_model.id)
+
+    def set_default(self, value: bool) -> None:
+        self.record = replace(self.record, is_default=value)
--- a/backend/app/domain/records/plan_record.py
+++ b/backend/app/domain/records/plan_record.py
@ -0,0 +1,32 @@
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Optional
+
+from backend.app.db.models.portfolio import Epc
+from backend.app.db.models.recommendations import PlanTypeEnum
+
+
+@dataclass(frozen=True)
+class PlanRecord:
+    property_id: int
+    portfolio_id: int
+    created_at: datetime
+    is_default: bool
+
+    valuation_increase_lower_bound: Optional[float] = None
+    valuation_increase_upper_bound: Optional[float] = None
+    valuation_increase_average: Optional[float] = None
+    plan_type: Optional[PlanTypeEnum] = None
+    post_sap_points: Optional[float] = None
+    post_epc_rating: Optional[Epc] = None
+    post_co2_emissions: Optional[float] = None
+    co2_savings: Optional[float] = None
+    post_energy_bill: Optional[float] = None
+    energy_bill_savings: Optional[float] = None
+    post_energy_consumption: Optional[float] = None
+    energy_consumption_savings: Optional[float] = None
+    valuation_post_retrofit: Optional[float] = None
+    valuation_increase: Optional[float] = None
+    cost_of_works: Optional[float] = None
+    contingency_cost: Optional[float] = None
+    name: Optional[str] = None
--- a/backend/app/domain/records/scenario_record.py
+++ b/backend/app/domain/records/scenario_record.py
@ -0,0 +1,47 @@
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Optional
+
+from backend.app.db.models.portfolio import PortfolioGoal
+
+
+@dataclass(frozen=True)
+class ScenarioRecord:
+    name: str
+    created_at: datetime
+    housing_type: str
+    goal: PortfolioGoal
+    goal_value: str
+    trigger_file_path: str
+    multi_plan: bool
+    is_default: bool
+    budget: Optional[float] = None
+    already_installed_file_path: Optional[str] = None
+    patches_file_path: Optional[str] = None
+    non_invasive_recommendations_file_path: Optional[str] = None
+    exclusions: Optional[str] = None
+
+    cost: Optional[float] = None
+    contingency: Optional[float] = None
+    funding: Optional[float] = None
+    total_work_hours: Optional[float] = None
+    energy_savings: Optional[float] = None
+    co2_equivalent_savings: Optional[float] = None
+    energy_cost_savings: Optional[float] = None
+    epc_breakdown_pre_retrofit: Optional[str] = None
+    epc_breakdown_post_retrofit: Optional[str] = None
+    number_of_properties: Optional[int] = None
+    n_units_to_retrofit: Optional[int] = None
+    co2_per_unit_pre_retrofit: Optional[str] = None
+    co2_per_unit_post_retrofit: Optional[str] = None
+    energy_bill_per_unit_pre_retrofit: Optional[str] = None
+    energy_bill_per_unit_post_retrofit: Optional[str] = None
+    energy_consumption_per_unit_pre_retrofit: Optional[str] = None
+    energy_consumption_per_unit_post_retrofit: Optional[str] = None
+    valuation_improvement_per_unit: Optional[str] = None
+    cost_per_unit: Optional[str] = None
+    cost_per_co2_saved: Optional[str] = None
+    cost_per_sap_point: Optional[str] = None
+    valuation_return_on_investment: Optional[str] = None
+    property_valuation_increase: Optional[float] = None
+    labour_days: Optional[float] = None
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -1,21 +1,29 @@
+from typing import List
+from uuid import UUID
+
 import boto3
 import json
 import math
 import asyncio
-from contextlib import contextmanager
-from sqlmodel import Session

 from datetime import datetime

 from fastapi import APIRouter, Depends
+from backend.app.db.connection import db_session
+from backend.app.db.models.tasks import SourceEnum
 from backend.app.dependencies import validate_token
 from backend.app.plan.schemas import PlanTriggerRequest
 from backend.app.config import get_settings
-from sqlalchemy.orm import sessionmaker
+from backend.categorisation.categorisation_trigger_request import (
+    CategorisationTriggerRequest,
+)
 from utils.logger import setup_logger
-from backend.app.db.connection import db_engine

-from backend.app.db.functions.recommendations_functions import create_scenario
+from backend.app.db.functions.recommendations_functions import (
+    create_scenario,
+    get_property_ids,
+    get_scenarios_count_by_portfolio_id,
+)
 from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface

 logger = setup_logger()
@ -24,23 +32,88 @@ router = APIRouter(
    prefix="/plan",
    tags=["plan"],
    dependencies=[Depends(validate_token)],
-    responses={404: {"description": "Not found"}}
+    responses={404: {"description": "Not found"}},
 )

-sqs_client = boto3.client("sqs")
+settings = get_settings()
+sqs_client = boto3.client("sqs", settings.AWS_DEFAULT_REGION)


-@contextmanager
-def db_session():
-    session = Session(db_engine)
-    try:
-        yield session
-        session.commit()
-    except Exception:
-        session.rollback()
-        raise
-    finally:
-        session.close()
+@router.post("/categorisation", status_code=202)
+async def trigger_categorisation(
+    body: CategorisationTriggerRequest,
+) -> dict[str, str]:
+    payload: CategorisationTriggerRequest = CategorisationTriggerRequest.model_validate(
+        body
+    )
+
+    logger.info("API triggered with body: %s", payload)
+
+    property_ids: list[int] = get_property_ids(payload.portfolio_id)
+    property_ids.sort()
+
+    num_scenarios: int = get_scenarios_count_by_portfolio_id(payload.portfolio_id)
+    total_plans_to_update: int = len(property_ids) * num_scenarios
+
+    max_writes_per_batch: int = 1000
+    properties_per_batch: int = max(1, max_writes_per_batch // num_scenarios)
+
+    num_property_batches: int = math.ceil(len(property_ids) / properties_per_batch)
+
+    logger.info("total_plans_to_update: %s", total_plans_to_update)
+    logger.info("properties_per_batch: %s", properties_per_batch)
+    logger.info("num_property_batchess: %s", num_property_batches)
+
+    # Create task
+    task_id, _ = TasksInterface.create_task(
+        task_source="backend/plan/router.py:trigger_categorisation",
+        service="plan_categorisation",
+        inputs=payload.model_dump(),
+        task_only=True,
+        source=SourceEnum.PORTFOLIO,
+        source_id=str(payload.portfolio_id),
+    )
+
+    # Dispatch requests to lambdas
+    subtask_interface = SubTaskInterface()
+
+    for batch_index in range(num_property_batches):
+
+        start: int = batch_index * properties_per_batch
+        end: int = start + properties_per_batch
+
+        batch_property_ids: List[int] = property_ids[start:end]
+
+        if not batch_property_ids:
+            continue
+
+        batch_request: CategorisationTriggerRequest = CategorisationTriggerRequest(
+            portfolio_id=payload.portfolio_id,
+            scenarios_to_consider=payload.scenarios_to_consider,
+            scenario_priority_order=payload.scenario_priority_order,
+            min_property_id=min(batch_property_ids),
+            max_property_id=max(batch_property_ids),
+        )
+        # Create sub-task for each
+        subtask_id: UUID = subtask_interface.create_subtask(
+            task_id=task_id, inputs=batch_request.model_dump()
+        )
+        batch_request.subtask_id = str(subtask_id)
+
+        response = sqs_client.send_message(
+            QueueUrl=settings.CATEGORISATION_SQS_URL,
+            MessageBody=batch_request.model_dump_json(),
+        )
+
+        logger.info(
+            f"Chunk {batch_index} sent to SQS. {len(batch_property_ids)} Property IDs in batch (total "
+            f"{len(property_ids)}). Property IDs {min(batch_property_ids)}–{max(batch_property_ids)}. Message ID: "
+            f"{response.get('MessageId')}"
+        )
+
+        await asyncio.sleep(0.05)  # Small delay to avoid SQS throttling
+
+    return {"message": "Categorisation jobs distributed"}


@router.post("/trigger", status_code=202)
@ -50,8 +123,6 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
    """
    logger.info("API triggered with body: %s", body)

-    settings = get_settings()
-
    try:
        data = body.model_dump()
    except Exception as e:
@ -59,7 +130,10 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
        return {"message": "Invalid request"}, 400

    # If file_format is domna_asset_list and type is xlsx, read and chunk it
-    if data.get("file_format") == "domna_asset_list" and data.get("file_type") == "xlsx":
+    if (
+        data.get("file_format") == "domna_asset_list"
+        and data.get("file_type") == "xlsx"
+    ):
        try:

            total_rows = data.get("sheet_count", 0)
@ -88,8 +162,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
                            "patches_file_path": body.patches_file_path,
                            "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path,
                            "exclusions": body.exclusions,
-                            "multi_plan": body.multi_plan
-                        }
+                            "multi_plan": body.multi_plan,
+                        },
                    )
                # Insert the scenario ID into the data payload
                data["scenario_id"] = scenario_id
@ -99,7 +173,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
                task_source="backend/plan/router.py:trigger_plan_entrypoint",
                service="plan_engine",
                inputs=data,
-                task_only=True
+                task_only=True,
            )

            subtask_interface = SubTaskInterface()
@ -109,13 +183,14 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
                index_end = min((i + 1) * chunk_size, total_rows)

                message_payload = {
-                    **data, "index_start": index_start, "index_end": index_end,
+                    **data,
+                    "index_start": index_start,
+                    "index_end": index_end,
                }

                # Create a subtask for this chunk
                subtask_id = subtask_interface.create_subtask(
-                    task_id=task_id,
-                    inputs=message_payload
+                    task_id=task_id, inputs=message_payload
                )

                # Add task and subtask to message
@ -125,8 +200,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
                message_body = json.dumps(message_payload)

                response = sqs_client.send_message(
-                    QueueUrl=settings.ENGINE_SQS_URL,
-                    MessageBody=message_body
+                    QueueUrl=settings.ENGINE_SQS_URL, MessageBody=message_body
                )
                logger.info(
                    f"Chunk {i} sent to SQS. Rows {index_start}–{index_end}. Message ID: {response.get('MessageId')}"
@ -153,8 +227,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
            data["subtask_id"] = str(subtask_id)
            message_body = json.dumps(data)
            response = sqs_client.send_message(
-                QueueUrl=settings.ENGINE_SQS_URL,
-                MessageBody=message_body
+                QueueUrl=settings.ENGINE_SQS_URL, MessageBody=message_body
            )
            logger.info(f"SQS message sent. Message ID: {response.get('MessageId')}")
        except Exception as e:
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@ -12,6 +12,10 @@ WALL_INSULATION_MEASURES = ["internal_wall_insulation", "external_wall_insulatio
 ROOF_INSULATION_MEASURES = [
    "loft_insulation", "flat_roof_insulation", "room_roof_insulation", "sloping_ceiling_insulation"
 ]
+WALL_INSULATION_WITH_VENTILATION_MEASURES = [
+    "internal_wall_insulation+mechanical_ventilation", "external_wall_insulation+mechanical_ventilation",
+    "cavity_wall_insulation+mechanical_ventilation"
+]

 # Both all and roof insulaiton measures are eligible for ECO4. These are the remaining fabric and heating measures
 # This is based on th measures we have recommendations for
--- a/backend/app/plan/utils.py
+++ b/backend/app/plan/utils.py
@ -1,5 +1,6 @@
 import ast
 import os
+from typing import Optional
 import msgpack
 from uuid import UUID
 from utils.s3 import read_from_s3
@ -24,7 +25,7 @@ def get_cleaned():

    cleaned = read_from_s3(
        s3_file_name="cleaned_epc_data/cleaned.bson",
-        bucket_name=get_settings().DATA_BUCKET
+        bucket_name=get_settings().DATA_BUCKET,
    )

    cleaned = msgpack.unpackb(cleaned, raw=False)
@ -56,32 +57,45 @@ def extract_property_request_data(
 ):
    patch_has_uprn = "uprn" in patches[0] if patches else True
    if patch_has_uprn:
-        patch = next((
-            x for x in patches if str(x["uprn"]) == str(address.uprn)
-        ), {})
+        patch = next((x for x in patches if str(x["uprn"]) == str(address.uprn)), {})
    else:
-        patch = next((
-            x for x in patches if (x["address"] == address.address) and (x["postcode"] == address.postcode)
-        ), {})
+        patch = next(
+            (
+                x
+                for x in patches
+                if (x["address"] == address.address)
+                and (x["postcode"] == address.postcode)
+            ),
+            {},
+        )

    # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN
    # we need to check existence of uprn
-    has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False
+    has_uprn = (
+        "uprn" in non_invasive_recommendations[0]
+        if non_invasive_recommendations
+        else False
+    )
    if has_uprn:
        has_uprn = non_invasive_recommendations[0]["uprn"] not in ["", None]

    if has_uprn:
-        property_non_invasive_recommendations = next((
-            x for x in non_invasive_recommendations if
-            (str(x["uprn"]) == str(uprn))
-        ), {})
+        property_non_invasive_recommendations = next(
+            (x for x in non_invasive_recommendations if (str(x["uprn"]) == str(uprn))),
+            {},
+        )

        # We patch the non-invasive recs that are ['cavity_extract_and_refill']
    else:
-        property_non_invasive_recommendations = next((
-            x for x in non_invasive_recommendations if
-            (x["address"] == address.address) and (x["postcode"] == address.postcode)
-        ), {})
+        property_non_invasive_recommendations = next(
+            (
+                x
+                for x in non_invasive_recommendations
+                if (x["address"] == address.address)
+                and (x["postcode"] == address.postcode)
+            ),
+            {},
+        )

    if isinstance(property_non_invasive_recommendations.get("recommendations"), str):
        property_non_invasive_recommendations["recommendations"] = ast.literal_eval(
@ -90,7 +104,11 @@ def extract_property_request_data(
        transformed = []
        for rec in property_non_invasive_recommendations["recommendations"]:
            if isinstance(rec, str):
-                transformed.append({"type": rec, })
+                transformed.append(
+                    {
+                        "type": rec,
+                    }
+                )
            else:
                transformed.append(rec)

@ -102,26 +120,36 @@ def extract_property_request_data(
        valuation_has_uprn = valuation_data[0]["uprn"] not in ["", None]

    if valuation_has_uprn:
-        property_valuation = next((
-            float(x["valuation"]) for x in valuation_data if
-            (str(x["uprn"]) == str(uprn))
-        ), None)
+        property_valuation = next(
+            (
+                float(x["valuation"])
+                for x in valuation_data
+                if (str(x["uprn"]) == str(uprn))
+            ),
+            None,
+        )
    else:
-        property_valuation = next((
-            float(x["valuation"]) for x in valuation_data if
-            (x["address"] == address.address) and (x["postcode"] == address.postcode)
-        ), None)
+        property_valuation = next(
+            (
+                float(x["valuation"])
+                for x in valuation_data
+                if (x["address"] == address.address)
+                and (x["postcode"] == address.postcode)
+            ),
+            None,
+        )

    # Return data class to give a structured format
    return PropertyRequestData(
        patch=patch,
        non_invasive_recommendations=property_non_invasive_recommendations,
-        valuation=property_valuation
+        valuation=property_valuation,
    )


-def parse_eco_packages(addr: Address, prepared_epc) -> tuple[list[str], int, str, list[str]] | tuple[
-    None, None, None, list]:
+def parse_eco_packages(
+    addr: Address, prepared_epc
+) -> tuple[list[str], int, str, list[str]] | tuple[None, None, None, list]:
    solar_identification = addr.solar_reason
    cavity_identification = addr.cavity_reason
    if not solar_identification and not cavity_identification:
@ -140,47 +168,51 @@ def parse_eco_packages(addr: Address, prepared_epc) -> tuple[list[str], int, str
        "Solar Eligible": {
            "measures": ["solar_pv", "loft_insulation", "mechanical_ventilation"],
            "target_sap": 86,  # High B
-            "plan_type": "solar_eco4"
+            "plan_type": "solar_eco4",
        },
        "Solar Eligible, Solid Wall Uninsulated, EPC E or Below": {
            "measures": ["solar_pv", "loft_insulation", "mechanical_ventilation"],
            "target_sap": 86,  # High B
-            "plan_type": "solar_eco4"
+            "plan_type": "solar_eco4",
        },
        "Solar Eligible, Needs Heating Upgrade": {
-            "measures": ["solar_pv", "loft_insulation", "high_heat_retention_storage_heaters",
-                         "mechanical_ventilation"],
+            "measures": [
+                "solar_pv",
+                "loft_insulation",
+                "high_heat_retention_storage_heaters",
+                "mechanical_ventilation",
+            ],
            "target_sap": 86,  # High B
-            "plan_type": "solar_hhrsh_eco4"
+            "plan_type": "solar_hhrsh_eco4",
        },
        "Non-Intrusive Data Shows Empty Cavity": {
            "measures": ["cavity_wall_insulation", "mechanical_ventilation"],
            "target_sap": 69,  # Low C
-            "plan_type": "empty_cavity_eco"
+            "plan_type": "empty_cavity_eco",
        },
-        'Non-Intrusive Data Shows Empty Cavity, built after 2002': {
+        "Non-Intrusive Data Shows Empty Cavity, built after 2002": {
            "measures": ["cavity_wall_insulation", "mechanical_ventilation"],
            "target_sap": 69,  # Low C
-            "plan_type": "empty_cavity_eco"
+            "plan_type": "empty_cavity_eco",
        },
        "EPC Shows Empty Cavity, inspections show retro drilled": {
            # EPC Indicates it's empty, so we simulate a fill
            "measures": ["cavity_wall_insulation", "mechanical_ventilation"],
            "target_sap": 69,  # Low C
-            "plan_type": "extraction_eco"
+            "plan_type": "extraction_eco",
        },
        "EPC Shows Empty Cavity, inspections show filled at build": {
            # EPC Indicates it's empty, so we simulate a fill
            "measures": ["cavity_wall_insulation", "mechanical_ventilation"],
            "target_sap": 69,  # Low C
-            "plan_type": "extraction_eco"
+            "plan_type": "extraction_eco",
        },
        "EPC Shows Empty Cavity": {
            # EPC Indicates it's empty, so we simulate a fill
            "measures": ["cavity_wall_insulation", "mechanical_ventilation"],
            "target_sap": 69,  # Low C
-            "plan_type": "empty_cavity_eco"
-        }
+            "plan_type": "empty_cavity_eco",
+        },
    }

    # Always prioritise solar
@ -214,9 +246,13 @@ def build_cloudwatch_log_url(start_ms: int) -> str:
    Build a CloudWatch Logs URL for the current Lambda invocation,
    including timestamp window from start_ms to end_ms (epoch ms).
    """
+    logger.info("Building cloudwatch logs URL")
    region = os.environ["AWS_REGION"]
+    logger.info("Building cloudwatch logs URL: Got AWS region")
    log_group = os.environ["AWS_LAMBDA_LOG_GROUP_NAME"]
+    logger.info("Building cloudwatch logs URL: Got lambda log group name")
    log_stream = os.environ["AWS_LAMBDA_LOG_STREAM_NAME"]
+    logger.info("Building cloudwatch logs URL: Got lambda log stream name")

    # CloudWatch console requires / encoded as $252F
    encoded_group = log_group.replace("/", "$252F")
@ -232,15 +268,21 @@ def build_cloudwatch_log_url(start_ms: int) -> str:
    )


-def handle_error(msg, e, subtask_id, status=500, start_ms=None):
+def handle_error(
+    msg: str,
+    exception: Exception,
+    subtask_id: str,
+    status_code: int = 500,
+    start_ms: Optional[int] = None,
+):
    # When the pipeline fails, handles error process
    cloud_logs_url = build_cloudwatch_log_url(start_ms)

    SubTaskInterface().update_subtask_status(
        subtask_id=UUID(subtask_id),
        status="failed",
-        outputs=str(e),
-        cloud_logs_url=cloud_logs_url
+        outputs=str(exception),
+        cloud_logs_url=cloud_logs_url,
    )
    logger.error(msg, exc_info=True)
-    return Response(status_code=status, content=msg)
+    return Response(status_code=status_code, content=msg)
--- a/backend/app/requirements/requirements.txt
+++ b/backend/app/requirements/requirements.txt
@ -10,7 +10,7 @@ mangum==0.19.0
 # AWS
 boto3==1.35.44
 # Data
-openpyxl==3.1.2
+openpyxl==3.1.5
 # Basic
 pytz
 sqlmodel
--- a/backend/app/tasks/router.py
+++ b/backend/app/tasks/router.py
@ -9,7 +9,7 @@ from backend.app.tasks.schema import (
    CreateSubTaskRequest,
    UpdateSubTaskStatusRequest,
    FinalizeSubTaskRequest,
-    TaskSqsTriggerRequest
+    TaskSqsTriggerRequest,
 )

 # Correct location of interfaces
@ -51,18 +51,18 @@ async def get_task(task_id: UUID):
        if not task:
            raise HTTPException(status_code=404, detail="Task not found")

-        subtasks = session.exec(
-            select(SubTask).where(SubTask.taskId == task_id)
-        ).all()
+        subtasks = session.exec(select(SubTask).where(SubTask.taskId == task_id)).all()

        formatted = []
        for st in subtasks:
-            formatted.append({
-                **st.dict(),
-                "inputs": json.loads(st.inputs) if st.inputs else None,
-                "outputs": json.loads(st.outputs) if st.outputs else None,
-                "cloud_logs_url": st.cloudLogsURL,
-            })
+            formatted.append(
+                {
+                    **st.dict(),
+                    "inputs": json.loads(st.inputs) if st.inputs else None,
+                    "outputs": json.loads(st.outputs) if st.outputs else None,
+                    "cloud_logs_url": st.cloudLogsURL,
+                }
+            )

        return {
            "task": task,
@ -111,7 +111,10 @@ async def update_subtask_status(subtask_id: UUID, req: UpdateSubTaskStatusReques

 # ===
 # Sub task is complete
-@router.post("/subtask/{subtask_id}/finalize", summary="Finalize a subtask with status, outputs, logs")
+@router.post(
+    "/subtask/{subtask_id}/finalize",
+    summary="Finalize a subtask with status, outputs, logs",
+)
 async def finalize_subtask(subtask_id: UUID, req: FinalizeSubTaskRequest):
    subtasks = SubTaskInterface()

@ -120,7 +123,7 @@ async def finalize_subtask(subtask_id: UUID, req: FinalizeSubTaskRequest):
            subtask_id=subtask_id,
            status=req.status,
            outputs=req.outputs,
-            cloud_logs_url=req.cloud_logs_url
+            cloud_logs_url=req.cloud_logs_url,
        )

        return {
@ -142,9 +145,10 @@ from backend.app.tasks.schema import TaskSqsTriggerRequest
 from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface
 from backend.app.config import get_settings

-sqs = boto3.client("sqs")

-@router.post("/trigger", summary="Create task + subtask and publish to SQS", status_code=202)
+@router.post(
+    "/trigger", summary="Create task + subtask and publish to SQS", status_code=202
+)
 async def trigger_task(req: TaskSqsTriggerRequest):
    """
    Creates a Task + SubTask, then pushes the SubTask into SQS so a Lambda can process it.
@ -152,11 +156,12 @@ async def trigger_task(req: TaskSqsTriggerRequest):
    """

    settings = get_settings()
+    sqs = boto3.client("sqs", settings.AWS_DEFAULT_REGION)

    tasks = TasksInterface()

    # ---- Normalize empty inputs ----
-    inputs = req.inputs or {}   # ensures {} even if null
+    inputs = req.inputs or {}  # ensures {} even if null

    # ---- 1. Create Task + SubTask ----
    task_id, subtask_id = tasks.create_task(
@ -174,8 +179,8 @@ async def trigger_task(req: TaskSqsTriggerRequest):
    try:
        response = sqs.send_message(
            QueueUrl=f"https://sqs.{settings.AWS_REGION}.amazonaws.com/"
-                     f"{settings.AWS_ACCOUNT_ID}/lambda-example-queue",
-            MessageBody=json.dumps(sqs_payload)
+            f"{settings.AWS_ACCOUNT_ID}/lambda-example-queue",
+            MessageBody=json.dumps(sqs_payload),
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"SQS error: {e}")
@ -186,4 +191,4 @@ async def trigger_task(req: TaskSqsTriggerRequest):
        "subtask_id": subtask_id,
        "sqs_message_id": response.get("MessageId"),
        "inputs_sent": inputs,
-    }
+    }
--- a/backend/categorisation/init.py
+++ b/backend/categorisation/init.py
--- a/backend/categorisation/categorisation_trigger_request.py
+++ b/backend/categorisation/categorisation_trigger_request.py
@ -0,0 +1,17 @@
+from typing import List, Optional
+from pydantic import BaseModel
+
+
+class CategorisationTriggerRequest(BaseModel):
+    portfolio_id: int
+
+    scenarios_to_consider: Optional[List[int]] = None
+    scenario_priority_order: Optional[List[int]] = None
+
+    min_property_id: Optional[int] = None
+    max_property_id: Optional[int] = None
+
+    subtask_id: Optional[str] = None
+
+
+# {"portfolio_id": 556, "scenarios_to_consider": [1039,1041], "scenario_priority_order": [1041,1039]}
--- a/backend/categorisation/handler/Dockerfile
+++ b/backend/categorisation/handler/Dockerfile
@ -0,0 +1,42 @@
+FROM public.ecr.aws/lambda/python:3.11
+# For local running:
+# FROM python:3.11.10-bullseye
+
+ARG DEV_DB_HOST
+ARG DEV_DB_PORT
+ARG DEV_DB_NAME
+
+
+# Set working directory (Lambda task root)
+WORKDIR /var/task
+
+# Environment
+ENV DB_HOST=${DEV_DB_HOST}
+ENV DB_PORT=${DEV_DB_PORT}
+ENV DB_NAME=${DEV_DB_NAME}
+
+COPY backend/.env.test backend/.env
+
+# -----------------------------
+# Copy requirements FIRST (for Docker layer caching)
+# -----------------------------
+COPY backend/categorisation/handler/requirements.txt .
+
+# Install dependencies into Lambda runtime
+RUN pip install --no-cache-dir -r requirements.txt
+
+# -----------------------------
+# Copy application code
+# -----------------------------
+COPY utils/ utils/
+# NOTE: if build is ever slow we can be more specific with which files are copied
+COPY backend/ backend/
+COPY datatypes/ datatypes/
+
+
+# -----------------------------
+# Lambda handler
+# -----------------------------
+CMD ["backend/categorisation/handler/handler.handler"]
+# For local running
+# CMD ["python", "-m", "backend.categorisation.handler.handler"]
--- a/backend/categorisation/handler/handler.py
+++ b/backend/categorisation/handler/handler.py
@ -0,0 +1,34 @@
+import json
+import time
+from typing import Any, Mapping
+
+from backend.app.db.functions.tasks.Tasks import SubTaskInterface
+from backend.app.plan.utils import build_cloudwatch_log_url
+from backend.categorisation.categorisation_trigger_request import (
+    CategorisationTriggerRequest,
+)
+from backend.categorisation.processor import process_portfolio
+from utils.logger import setup_logger
+
+
+logger = setup_logger()
+
+
+def handler(event: Mapping[str, Any], context: Any) -> None:
+
+    logger.info("Received message")
+
+    logger.info(f"Number of events: {len(event.get('Records', []))}")
+
+    for record in event.get("Records", []):
+        try:
+            body_dict = json.loads(record["body"])
+            logger.debug("Validating request body")
+            payload = CategorisationTriggerRequest.model_validate(body_dict)
+
+            logger.debug("Successfully validated request body")
+
+            process_portfolio(payload)
+        except Exception as e:
+            logger.info("Handler exception")
+            logger.error(f"Failed to process record: {e}")
--- a/backend/categorisation/handler/requirements.txt
+++ b/backend/categorisation/handler/requirements.txt
@ -0,0 +1,10 @@
+sqlmodel
+pydantic-settings
+psycopg2-binary==2.9.10
+starlette
+
+# Not used but needed to satisfy imports
+pytz==2024.2
+msgpack==1.1.0
+numpy<2
+pandas==2.2.3
--- a/backend/categorisation/local_handler/docker-compose.yml
+++ b/backend/categorisation/local_handler/docker-compose.yml
@ -0,0 +1,11 @@
+version: "3.9"
+
+services:
+  categorisation-lambda:
+    build:
+      context: ../../../
+      dockerfile: backend/categorisation/handler/Dockerfile
+    ports:
+      - "9000:8080"
+    env_file:
+      - ../../../.env
--- a/backend/categorisation/local_handler/invoke_local_lambda.py
+++ b/backend/categorisation/local_handler/invoke_local_lambda.py
@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+import json
+import requests
+
+HOST = "localhost"
+PORT = "9000"
+
+LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
+
+payload = {
+    "Records": [
+        {
+            "body": json.dumps(
+                {
+                    "portfolio_id": 569,
+                    "scenarios_to_consider": [],
+                    "scenario_priority_order": [],
+                    "min_property_id": 660418,
+                    "max_property_id": 660917,
+                    "subtask_id": "6a0bcbac-ddab-435f-8708-8acd4662b067",
+                }
+            )
+        }
+    ]
+}
+
+response = requests.post(LAMBDA_URL, json=payload)
+
+print("Status code:", response.status_code)
+print("Response:")
+print(response.text)
--- a/backend/categorisation/local_runner.py
+++ b/backend/categorisation/local_runner.py
@ -0,0 +1,24 @@
+from typing import List
+
+from backend.categorisation.categorisation_trigger_request import (
+    CategorisationTriggerRequest,
+)
+from backend.categorisation.processor import process_portfolio
+
+
+def main() -> None:
+    portfolio_id = 556
+    scenarios_to_consider: List[int] = []
+    scenario_priority_order: List[int] = []
+
+    process_portfolio(
+        CategorisationTriggerRequest(
+            portfolio_id=portfolio_id,
+            scenarios_to_consider=scenarios_to_consider,
+            scenario_priority_order=scenario_priority_order,
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/backend/categorisation/processor.py
+++ b/backend/categorisation/processor.py
@ -0,0 +1,261 @@
+import time
+from collections import defaultdict
+from typing import Dict, List, Optional
+from uuid import UUID
+from starlette.responses import Response
+
+from backend.app.db.functions.recommendations_functions import (
+    bulk_update_plans,
+    get_default_plans,
+    get_most_recent_plans_by_portfolio_id,
+    get_most_recent_plans_by_scenario_ids,
+    get_scenarios_by_portfolio_id,
+)
+from backend.app.db.functions.tasks.Tasks import SubTaskInterface
+from backend.app.db.models.recommendations import PlanModel, ScenarioModel
+from backend.app.domain.classes.plan import Plan
+from backend.app.domain.classes.scenario import Scenario
+from backend.app.plan.utils import build_cloudwatch_log_url, handle_error
+from backend.categorisation.categorisation_trigger_request import (
+    CategorisationTriggerRequest,
+)
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def process_portfolio(
+    body: CategorisationTriggerRequest,
+) -> Response:  # TODO: make this a class
+    portfolio_id: int = body.portfolio_id
+    scenarios_to_consider: Optional[List[int]] = body.scenarios_to_consider
+    scenario_priority_order: Optional[List[int]] = body.scenario_priority_order
+    min_property_id: Optional[int] = body.min_property_id
+    max_property_id: Optional[int] = body.max_property_id
+    subtask_id: Optional[str] = body.subtask_id
+
+    logger.info(f"Processing portfolio {portfolio_id}")
+    start_ms = int(time.time() * 1000)
+    cloud_logs_url = build_cloudwatch_log_url(start_ms)
+
+    if body.subtask_id:
+        SubTaskInterface().update_subtask_status(
+            subtask_id=UUID(subtask_id),
+            status="in progress",
+            cloud_logs_url=cloud_logs_url,
+        )
+
+    try:
+
+        all_scenarios: List[Scenario] = _load_scenarios_for_portfolio(portfolio_id)
+        plans_by_id: Dict[int, Plan] = (
+            {}
+        )  # TODO: make this an in-memory repository class
+
+        if scenarios_to_consider:
+            if len(scenarios_to_consider) < 2:
+                raise ValueError(
+                    "Cannot run auto categorisation for fewer than 2 scenarios"
+                )
+
+        # first get all plans that we're interested in
+        plans_for_consideration: List[Plan] = _load_plans_for_portfolio(
+            portfolio_id,
+            all_scenarios,
+            scenarios_to_consider,
+            min_property_id,
+            max_property_id,
+        )
+        for plan in plans_for_consideration:
+            if plan.id is not None:  # just in case
+                plans_by_id[plan.id] = plan
+
+        # then unset existing defaults on domain objects regardless of whether they're under consideration or not
+        default_plans: List[Plan] = _get_default_plans(
+            portfolio_id, all_scenarios, min_property_id, max_property_id
+        )
+        for plan in default_plans:
+            plan.set_default(False)
+            if plan.id is not None:  # just in case
+                plans_by_id[plan.id] = plan
+
+        logger.info(f"Successfully unset {len(default_plans)} default plan(s)")
+
+        # then set new defaults on domain objects under consideration
+        plans_for_consideration_by_property: Dict[int, List[Plan]] = (
+            _group_plans_by_property(plans_for_consideration)
+        )
+
+        for property_id, property_plans in plans_for_consideration_by_property.items():
+            if not property_plans:
+                raise ValueError(f"No plans for property {property_id}")
+
+            try:
+                cheapest_plan = choose_cheapest_relevant_plan(
+                    property_plans, scenario_priority_order
+                )
+            except Exception:
+                logger.error(f"Failed to find cheapest plan for property {property_id}")
+                raise
+
+            property_plans = _update_plan_objects(property_plans, cheapest_plan)
+            for plan in property_plans:
+                if plan.id is not None:  # just in case
+                    plans_by_id[plan.id] = plan
+
+        logger.info("Successfully set defaults on Plan objects in memory")
+
+        # then pass all domain objects to database to update (regardless of whether they've changed)
+        _update_plans_in_db(list(plans_by_id.values()))
+
+        # Mark the subtask as successful
+        logger.info(f"Successfully updated {len(plans_by_id)} Plans in database")
+        if body.subtask_id:
+            SubTaskInterface().update_subtask_status(
+                subtask_id=UUID(subtask_id),
+                status="complete",
+                cloud_logs_url=cloud_logs_url,
+            )
+
+        return Response(status_code=200)
+    except Exception as e:
+        if subtask_id:
+            return handle_error(
+                "Exception during Categorisation processing.",
+                e,
+                subtask_id,
+                500,
+                start_ms,
+            )
+
+        raise
+
+
+def choose_cheapest_relevant_plan(
+    plans: List[Plan], scenario_priority_order: Optional[List[int]] = None
+) -> Plan:
+    scenario_priority_order = scenario_priority_order or []
+
+    eligible_plans: List[Plan] = [plan for plan in plans if plan.is_compliant] or plans
+    if not eligible_plans:
+        raise ValueError("No plans available to choose from.")
+
+    for plan in eligible_plans:
+        if plan.id is None:
+            # This should never actually happen, but plan.id is optional to cater
+            # for new plans. We are only working with already persisted plans here
+            raise ValueError(
+                f"All plans must have an ID, but found a plan with no ID: {plan}"
+            )
+
+    min_cost: float = min(plan.cost for plan in eligible_plans)
+
+    cheapest_plans: List[Plan] = [
+        plan for plan in eligible_plans if plan.cost == min_cost
+    ]
+
+    for priority_scenario_id in scenario_priority_order:
+        for plan in cheapest_plans:
+            if plan.scenario.id == priority_scenario_id:
+                return plan
+
+    return cheapest_plans[0]
+
+
+def _get_default_plans(
+    portfolio_id: int,
+    scenarios: List[Scenario],
+    min_property_id: Optional[int] = None,
+    max_property_id: Optional[int] = None,
+) -> List[Plan]:
+    default_plan_models = get_default_plans(
+        portfolio_id, min_property_id, max_property_id
+    )
+
+    scenario_map = {s.id: s for s in scenarios}
+
+    return [
+        Plan.from_sqlalchemy(p, scenario_map[p.scenario_id])
+        for p in default_plan_models
+        if p.scenario_id in scenario_map
+    ]
+
+
+def _load_scenarios_for_portfolio(portfolio_id: int) -> List[Scenario]:
+    scenario_models: List[ScenarioModel] = get_scenarios_by_portfolio_id(portfolio_id)
+
+    return [Scenario.from_sqlalchemy(s) for s in scenario_models]
+
+
+def _load_plans_for_portfolio(
+    portfolio_id: int,
+    all_scenarios: List[Scenario],
+    scenarios_to_consider: Optional[List[int]] = None,
+    min_property_id: Optional[int] = None,
+    max_property_id: Optional[int] = None,
+) -> List[Plan]:
+
+    if scenarios_to_consider:
+        logger.info(f"Getting plans for {len(scenarios_to_consider)} scenarios")
+        plan_models: List[PlanModel] = get_most_recent_plans_by_scenario_ids(
+            scenarios_to_consider, min_property_id, max_property_id
+        )
+        logger.info(f"Got {len(plan_models)} plan models from database")
+    else:
+        logger.info(
+            f"No list of Plans to consider provided. Getting all Plans for portfolio {portfolio_id}"
+        )
+        plan_models: List[PlanModel] = get_most_recent_plans_by_portfolio_id(
+            portfolio_id, min_property_id, max_property_id
+        )
+
+    plans: List[Plan] = []
+
+    if not all_scenarios:
+        raise Exception(f"No scenarios found for Portfolio {portfolio_id}")
+
+    for model in plan_models:
+
+        scenario = next((s for s in all_scenarios if s.id == model.scenario_id))
+        if not scenario:
+            logger.info(f"No Scenario associated with Plan of ID {model.id}")
+            continue
+
+        plans.append(Plan.from_sqlalchemy(model, scenario))
+
+    logger.info(f"Got {len(plans)} Plans")
+    return plans
+
+
+def _group_plans_by_property(plans: List[Plan]) -> Dict[int, List[Plan]]:
+    grouped: dict[int, List[Plan]] = defaultdict(list)
+
+    for plan in plans:
+        grouped[plan.record.property_id].append(plan)
+
+    return grouped
+
+
+def _update_plan_objects(plans: List[Plan], cheapest_plan: Plan) -> List[Plan]:
+    for plan in plans:
+        should_be_default: bool = plan.id == cheapest_plan.id
+        plan.set_default(should_be_default)
+
+        if should_be_default:
+            logger.debug(
+                f"Setting Plan {plan.id} (Scenario Name: {plan.scenario.record.name}) to default"
+            )
+
+    return plans
+
+
+def _update_plans_in_db(plans: List[Plan]) -> None:
+    plan_models: List[PlanModel] = []
+    scenario_models: List[ScenarioModel] = []
+
+    for plan in plans:
+        plan_model, scenario_model = plan.to_sqlalchemy()
+        plan_models.append(plan_model)
+        scenario_models.append(scenario_model)
+
+    bulk_update_plans(plan_models, scenario_models)
--- a/backend/categorisation/tests/test_plan_is_compliant.py
+++ b/backend/categorisation/tests/test_plan_is_compliant.py
@ -0,0 +1,73 @@
+from typing import Callable
+import pytest
+from datetime import datetime
+
+from backend.app.domain.classes.plan import Plan
+from backend.app.domain.classes.scenario import Scenario
+from backend.app.domain.records.plan_record import PlanRecord
+from backend.app.domain.records.scenario_record import ScenarioRecord
+from backend.app.db.models.portfolio import Epc, PortfolioGoal
+
+
+@pytest.fixture
+def created_at_datetime() -> datetime:
+    return datetime.now()
+
+
+@pytest.fixture
+def epc_c_scenario(created_at_datetime: datetime) -> "Scenario":
+    # arrange
+    scenario_record = ScenarioRecord(
+        name="EPC C",
+        created_at=created_at_datetime,
+        housing_type="",
+        goal=PortfolioGoal.INCREASING_EPC,
+        goal_value="C",
+        trigger_file_path="",
+        multi_plan=False,
+        is_default=False,
+    )
+    return Scenario(record=scenario_record, id=1)
+
+
+@pytest.fixture
+def plan_factory(
+    epc_c_scenario: "Scenario", created_at_datetime: datetime
+) -> Callable[[int, "Epc"], "Plan"]:
+    # returns a function to create plans with different attributes
+    def _create_plan(post_sap_points: int, post_epc_rating: "Epc") -> "Plan":
+        plan_record = PlanRecord(
+            property_id=1,
+            portfolio_id=1,
+            created_at=created_at_datetime,
+            is_default=False,
+            post_sap_points=post_sap_points,
+            post_epc_rating=post_epc_rating,
+        )
+        return Plan(record=plan_record, scenario=epc_c_scenario, id=1)
+
+    return _create_plan
+
+
+@pytest.mark.parametrize(
+    "post_sap_points, post_epc_rating, expected_compliance",
+    [
+        (75, Epc.C, True),
+        (100, Epc.A, True),
+        (60, Epc.D, False),
+    ],
+)
+def test_scenario_goal_is_epc_c(
+    plan_factory: Callable[[int, "Epc"], "Plan"],
+    post_sap_points: int,
+    post_epc_rating: "Epc",
+    expected_compliance: bool,
+) -> None:
+    # arrange
+    plan = plan_factory(post_sap_points, post_epc_rating)
+
+    # act
+    actual_compliance: bool = plan.is_compliant
+
+    # assert
+    assert actual_compliance == expected_compliance
--- a/backend/categorisation/tests/test_prioritised_plan_selected.py
+++ b/backend/categorisation/tests/test_prioritised_plan_selected.py
@ -0,0 +1,160 @@
+from datetime import datetime
+from typing import List, Optional
+import pytest
+
+from backend.app.domain.classes.plan import Plan
+from backend.app.domain.classes.scenario import Scenario
+from backend.app.domain.records.plan_record import PlanRecord
+from backend.app.domain.records.scenario_record import ScenarioRecord
+from backend.app.db.models.portfolio import Epc, PortfolioGoal
+from backend.categorisation.processor import choose_cheapest_relevant_plan
+
+
+@pytest.fixture
+def created_at_datetime() -> datetime:
+    return datetime.now()
+
+
+def make_plan_record(
+    created_at: datetime, default: bool, cost_of_works: Optional[float] = 500.0
+) -> PlanRecord:
+    return PlanRecord(
+        property_id=1,
+        portfolio_id=1,
+        created_at=created_at,
+        is_default=default,
+        post_epc_rating=Epc.C,
+        cost_of_works=cost_of_works,
+    )
+
+
+def make_scenario(name: str, created_at: datetime, is_default: bool) -> Scenario:
+    record = ScenarioRecord(
+        name=name,
+        created_at=created_at,
+        housing_type="",
+        goal=PortfolioGoal.INCREASING_EPC,
+        goal_value="C",
+        trigger_file_path="",
+        multi_plan=False,
+        is_default=is_default,
+    )
+    return Scenario(record=record, id=3 if is_default else 4)
+
+
+def make_plan(
+    created_at: datetime,
+    default: bool,
+    cost_of_works: Optional[float] = 500.0,
+    name: str = "",
+) -> Plan:
+    scenario = make_scenario(name, created_at, default)
+    plan_id = 1 if default else 2
+    return Plan(
+        record=make_plan_record(created_at, default, cost_of_works),
+        scenario=scenario,
+        id=plan_id,
+    )
+
+
+def test_prioritised_scenario_selected(created_at_datetime: datetime) -> None:
+    # arrange
+    epc_c_plan = make_plan(created_at_datetime, True, name="EPC C")
+    minor_works_plan = make_plan(created_at_datetime, False, name="EPC C - Minor Works")
+    scenario_priority_order: List[int] = [4, 3]
+    expected_default_plan_id = 2
+
+    # act
+    actual_default_plan = choose_cheapest_relevant_plan(
+        plans=[epc_c_plan, minor_works_plan],
+        scenario_priority_order=scenario_priority_order,
+    )
+
+    # assert
+    assert actual_default_plan.id == expected_default_plan_id
+
+
+def test_cheapest_plan_returned_if_not_in_priority_list(
+    created_at_datetime: datetime,
+) -> None:
+    # arrange
+    epc_c_plan = make_plan(
+        created_at_datetime, True, cost_of_works=1000.0, name="EPC C"
+    )
+    minor_works_plan = make_plan(
+        created_at_datetime, False, cost_of_works=100.0, name="EPC C - Minor Works"
+    )
+    scenario_priority_order: List[int] = [3, 5]
+    expected_default_plan_id = 2
+
+    # act
+    actual_default_plan = choose_cheapest_relevant_plan(
+        plans=[epc_c_plan, minor_works_plan],
+        scenario_priority_order=scenario_priority_order,
+    )
+
+    # assert
+    assert actual_default_plan.id == expected_default_plan_id
+
+
+def test_all_plans_zero_cost__highest_priority_returned(
+    created_at_datetime: datetime,
+) -> None:
+    # arrange
+    epc_c_plan = make_plan(created_at_datetime, True, cost_of_works=0.0, name="EPC C")
+    minor_works_plan = make_plan(
+        created_at_datetime, False, cost_of_works=0.0, name="EPC C - Minor Works"
+    )
+    scenario_priority_order: List[int] = [4, 3]
+    expected_default_plan_id = 2
+
+    # act
+    actual_default_plan = choose_cheapest_relevant_plan(
+        plans=[epc_c_plan, minor_works_plan],
+        scenario_priority_order=scenario_priority_order,
+    )
+
+    # assert
+    assert actual_default_plan.id == expected_default_plan_id
+
+
+def test_some_plans_zero_cost__cheapest_returned(
+    created_at_datetime: datetime,
+) -> None:
+    # arrange
+    epc_c_plan = make_plan(created_at_datetime, True, cost_of_works=0.0, name="EPC C")
+    minor_works_plan = make_plan(
+        created_at_datetime, False, cost_of_works=50.0, name="EPC C - Minor Works"
+    )
+    scenario_priority_order: List[int] = [4, 3]
+    expected_default_plan_id = 1
+
+    # act
+    actual_default_plan = choose_cheapest_relevant_plan(
+        plans=[epc_c_plan, minor_works_plan],
+        scenario_priority_order=scenario_priority_order,
+    )
+
+    # assert
+    assert actual_default_plan.id == expected_default_plan_id
+
+
+def test_all_plans_null_cost__highest_priority_returned(
+    created_at_datetime: datetime,
+) -> None:
+    # arrange
+    epc_c_plan = make_plan(created_at_datetime, True, cost_of_works=None, name="EPC C")
+    minor_works_plan = make_plan(
+        created_at_datetime, False, cost_of_works=None, name="EPC C - Minor Works"
+    )
+    scenario_priority_order: List[int] = [4, 3]
+    expected_default_plan_id = 2
+
+    # act
+    actual_default_plan = choose_cheapest_relevant_plan(
+        plans=[epc_c_plan, minor_works_plan],
+        scenario_priority_order=scenario_priority_order,
+    )
+
+    # assert
+    assert actual_default_plan.id == expected_default_plan_id
--- a/backend/condition/condition_trigger_request.py
+++ b/backend/condition/condition_trigger_request.py
@ -29,5 +29,5 @@ class ConditionTriggerRequest(BaseModel):
 # {
 #     "file_type": "LBWF",
 #     "trigger_file_bucket": "condition-data-dev",
-#     "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx",
+#     "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx"
 # }
--- a/backend/docker-compose-local-lambdas.yml
+++ b/backend/docker-compose-local-lambdas.yml
@ -0,0 +1,11 @@
+version: "3.9"
+
+services:
+  categorisation-lambda:
+    build:
+      context: ../
+      dockerfile: backend/categorisation/handler/Dockerfile
+    ports:
+      - "9000:8080"
+    env_file:
+      - ../.env
--- a/backend/engine/engine.py
+++ b/backend/engine/engine.py
@ -1191,14 +1191,18 @@ async def model_engine(body: PlanTriggerRequest):
            property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures]
            measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures]

-            ventilation_included = "ventilation" in property_measure_types
+            # TODO - formalise property measure types into an enum
+            ventilation_included = (
+                "ventilation" in property_measure_types or "mechanical_ventilation" in property_measure_types
+            )

            # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore
            # its inclusion

-            needs_ventilation = any(
-                x in property_measure_types for x in assumptions.measures_needing_ventilation
-            ) and not p.has_ventilation and ventilation_included
+            needs_ventilation = optimiser_functions.check_needs_ventilation(
+                property_measure_types, assumptions.measures_needing_ventilation, p.has_ventilation,
+                ventilation_included
+            )

            if not measures_to_optimise:
                # Nothing to do, we just reshape the recommendations
@ -1315,7 +1319,7 @@ async def model_engine(body: PlanTriggerRequest):
                    recommendations=recommendations, selected=selected,
                )

-            # Add best practice measures (ventilation/trickle vents)
+            # Add best practice measures (ventilation/trickle vents) - pass needs_ventilation flag
            selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected)
            # Final flattening - we pass what the battery SAP score would be, regardless if the battery was selected
            recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults(
--- a/backend/export/README.md
+++ b/backend/export/README.md
@ -0,0 +1,169 @@
+# 🧪 Running Tests in PyCharm (macOS + pytest-postgresql)
+
+Our test suite uses `pytest` and `pytest-postgresql`, which
+automatically spins up a temporary PostgreSQL instance.
+
+On Linux (including GitHub Actions), PostgreSQL binaries are installed
+in standard system locations.\
+On macOS (Homebrew), they are not --- so PyCharm needs a small
+configuration tweak to locate `pg_ctl`.
+
+This guide explains how to run and debug tests locally in PyCharm
+without modifying test code.
+
+------------------------------------------------------------------------
+
+## ✅ Prerequisites
+
+### Devcontainer
+
+Postgres install is included in the devcontainer, so no additional setup is needed.
+
+Running
+
+```bash
+make test
+```
+
+Will instigate the test suite, which will automatically start a temporary PostgreSQL instance.
+
+### Local MacOS
+
+1. Install PostgreSQL via Homebrew:
+
+``` bash
+brew install postgresql
+```
+
+2. Confirm `pg_ctl` exists:
+
+``` bash
+which pg_ctl
+```
+
+Typical output:
+
+    /opt/homebrew/bin/pg_ctl
+
+------------------------------------------------------------------------
+
+# 🚀 Running Tests in PyCharm
+
+## Step 1 --- Create a PyCharm pytest Run Configuration
+
+1. Open the test file.
+2. Click the green ▶ next to the test.
+3. Choose **"Edit Run Configuration..."**
+
+You should see something like:
+
+- **Target:** `backend/export/tests/test_export.py`
+- **Working directory:** Project root (e.g.`Model/`)
+
+------------------------------------------------------------------------
+
+## Step 2 --- Add Required Override (macOS Only)
+
+In the Run Configuration:
+
+### ➜ "Additional Arguments"
+
+Add:
+
+    --override-ini=postgresql_exec=/opt/homebrew/bin/pg_ctl
+
+This tells `pytest-postgresql` where `pg_ctl` lives on macOS.
+
+Without this, PyCharm may fail with:
+
+    ExecutableMissingException: Could not found pg_config executable
+
+------------------------------------------------------------------------
+
+## Step 3 --- Run or Debug
+
+You can now:
+
+- Click ▶ Run\
+- Click 🐞 Debug\
+- Set breakpoints normally
+
+The temporary PostgreSQL instance will start automatically.
+
+------------------------------------------------------------------------
+
+# 🔍 Why This Is Needed
+
+`pytest-postgresql` defaults to a Linux-style path:
+
+    /usr/lib/postgresql/<version>/bin/pg_ctl
+
+That path exists on Ubuntu (CI), but not on macOS.
+
+On macOS, Homebrew installs PostgreSQL in:
+
+    /opt/homebrew/bin/
+
+The `--override-ini` flag safely overrides the executable path
+**locally**, without modifying:
+
+- test files\
+- `conftest.py`\
+- `pytest.ini`\
+- CI configuration
+
+This ensures:
+
+- ✅ Tests still work in GitHub Actions\
+- ✅ Tests still work for Linux users\
+- ✅ macOS developers can debug in PyCharm\
+- ✅ No repository-specific hacks are required
+
+------------------------------------------------------------------------
+
+# 🛠 Optional: Using a Local `.env` File
+
+If you prefer not to hardcode the override in the run configuration:
+
+1. Create a local file:
+
+```{=html}
+<!-- -->
+```
+
+    .env.local
+
+2. Add:
+
+```{=html}
+<!-- -->
+```
+
+    PYTEST_ADDOPTS=--override-ini=postgresql_exec=/opt/homebrew/bin/pg_ctl
+
+3. In PyCharm:
+    - Open the Run Configuration
+    - Add `.env.local` under **"Paths to .env files"**
+
+------------------------------------------------------------------------
+
+# 🧪 Running Tests via Terminal (Recommended for CI Parity)
+
+For normal execution outside PyCharm:
+
+``` bash
+make test
+```
+
+These already work without additional configuration.
+
+------------------------------------------------------------------------
+
+# 🧠 Summary
+
+Environment Works Without Override? Needs `--override-ini`?
+  ------------------------ ------------------------- -------------------------
+GitHub Actions (Linux)   ✅ Yes ❌ No
+Linux local ✅ Yes ❌ No
+macOS terminal (tox)     ✅ Yes ❌ No
+macOS PyCharm debugger ❌ No ✅ Yes
--- a/backend/export/property_scenarios/db_functions.py
+++ b/backend/export/property_scenarios/db_functions.py
@ -0,0 +1,227 @@
+from typing import List, Any, Dict, Optional, Tuple, Sequence
+import pandas as pd
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+from sqlalchemy.engine import Row
+from collections import defaultdict
+
+from backend.app.db.models.recommendations import (
+    Recommendation,
+    PlanModel,
+    PlanRecommendations,
+    RecommendationMaterials,
+)
+from backend.app.db.models.portfolio import (
+    PropertyModel,
+    PropertyDetailsEpcModel,
+)
+from backend.app.db.models.materials import Material
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+class DbMethods:
+
+    def __init__(self, session: Session) -> None:
+        self.session = session
+
+    def get_properties(self, portfolio_id: int) -> pd.DataFrame:
+        """
+        Function to fetch the property data, for property scenario exports
+        :param portfolio_id:
+        :return:
+        """
+        stmt = (
+            select(PropertyModel, PropertyDetailsEpcModel)
+            .join(
+                PropertyDetailsEpcModel,
+                PropertyModel.id == PropertyDetailsEpcModel.property_id,
+            )
+            .where(PropertyModel.portfolio_id == portfolio_id)
+        )
+
+        rows: Sequence[Row[Tuple[PropertyModel, PropertyDetailsEpcModel]]] = (
+            self.session.execute(stmt).all()
+        )
+
+        data: List[Dict[str, Any]] = [
+            {
+                **{
+                    col.name: getattr(property_model, col.name)
+                    for col in PropertyModel.__table__.columns.values()
+                },
+                **{
+                    col.name: getattr(epc_model, col.name)
+                    for col in PropertyDetailsEpcModel.__table__.columns.values()
+                },
+            }
+            for property_model, epc_model in rows
+        ]
+
+        return pd.DataFrame(data)
+
+    def get_latest_plans(
+        self,
+        portfolio_id: int,
+        scenario_ids: Optional[List[int]] = None,
+        default_only: bool = False,
+    ) -> pd.DataFrame:
+        """
+        Fetch latest plans.
+
+        Modes:
+        1) Scenario mode: latest per (scenario_id, property_id)
+        2) Default mode: latest default plan per property (ignores scenario_ids)
+
+        """
+
+        # -----------------------------
+        # Sanity checks
+        # -----------------------------
+        if default_only and scenario_ids:
+            # Override scenario_ids to make it explicit that they will be ignored in the query
+            scenario_ids = None
+
+        if not default_only and not scenario_ids:
+            raise ValueError(
+                "Either scenario_ids must be provided "
+                "or default_only must be True."
+            )
+
+        # -----------------------------
+        # Filter on just the default plans - we ignore the scenario ids. NOTE - this is specific to postgres
+        # and relies on DISTINCT ON behaviour.
+        # -----------------------------
+        if default_only:
+            # Latest default plan per property (ignore scenarios entirely)
+            # DISTINCT ON (property_id) keeps the first row per property,
+            # ordered by created_at DESC so we get the newest one.
+
+            stmt = (
+                select(PlanModel)
+                .where(
+                    PlanModel.portfolio_id == portfolio_id,
+                    PlanModel.is_default.is_(True),
+                )
+                .distinct(PlanModel.property_id)
+                .order_by(
+                    PlanModel.property_id,
+                    PlanModel.created_at.desc(),
+                )
+            )
+
+        else:
+            # Latest plan per (scenario_id, property_id)
+            # DISTINCT ON (scenario_id, property_id) keeps the newest
+            # plan per scenario/property combination.
+
+            assert scenario_ids is not None
+
+            stmt = (
+                select(PlanModel)
+                .where(
+                    PlanModel.portfolio_id == portfolio_id,
+                    PlanModel.scenario_id.in_(scenario_ids),
+                )
+                .distinct(
+                    PlanModel.scenario_id,
+                    PlanModel.property_id,
+                )
+                .order_by(
+                    PlanModel.scenario_id,
+                    PlanModel.property_id,
+                    PlanModel.created_at.desc(),
+                )
+            )
+
+        logger.info("Fetching plans")
+
+        plans: Sequence[PlanModel] = self.session.scalars(stmt).all()
+
+        return pd.DataFrame(
+            [
+                {
+                    col.name: getattr(plan, col.name)
+                    for col in PlanModel.__table__.columns.values()
+                }
+                for plan in plans
+            ]
+        )
+
+    def get_recommendations(self, plan_ids: List[int]) -> pd.DataFrame:
+
+        if not plan_ids:
+            logger.info("No plan ids provided")
+            return pd.DataFrame()
+
+        stmt = (
+            select(Recommendation, PlanModel.scenario_id, PlanModel.name)
+            .join(
+                PlanRecommendations,
+                Recommendation.id == PlanRecommendations.recommendation_id,
+            )
+            .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
+            .where(
+                PlanRecommendations.plan_id.in_(plan_ids),
+                Recommendation.default.is_(True),
+                Recommendation.already_installed.is_(False),
+            )
+        )
+
+        rows: Sequence[Tuple[Recommendation, Optional[int], Optional[str]]] = (
+            self.session.execute(stmt).tuples().all()
+        )
+
+        data: List[Dict[str, Any]] = [
+            {
+                **{
+                    col.name: getattr(rec_model, col.name)
+                    for col in Recommendation.__table__.columns.values()
+                },
+                "scenario_id": scenario_id,
+                "plan_name": plan_name,
+            }
+            for rec_model, scenario_id, plan_name in rows
+        ]
+
+        return pd.DataFrame(data)
+
+    def attach_materials(self, recommendations_df: pd.DataFrame) -> pd.DataFrame:
+
+        if recommendations_df.empty:
+            recommendations_df["materials"] = []
+            return recommendations_df
+
+        rec_ids: List[int] = recommendations_df["id"].astype(int).tolist()
+
+        stmt = (
+            select(RecommendationMaterials, Material)
+            .join(Material, RecommendationMaterials.material_id == Material.id)
+            .where(RecommendationMaterials.recommendation_id.in_(rec_ids))
+        )
+
+        rows: Sequence[Tuple[RecommendationMaterials, Material]] = (
+            self.session.execute(stmt).tuples().all()
+        )
+
+        materials_map: Dict[int, List[Dict[str, Any]]] = defaultdict(list)
+
+        for rec_mat, material in rows:
+            materials_map[rec_mat.recommendation_id].append(
+                {
+                    "material_id": rec_mat.material_id,
+                    "depth": rec_mat.depth,
+                    "quantity": rec_mat.quantity,
+                    "quantity_unit": rec_mat.quantity_unit,
+                    "estimated_cost": rec_mat.estimated_cost,
+                    "type": material.type.value if material.type else None,
+                    "includes_battery": material.includes_battery,
+                }
+            )
+
+        recommendations_df["materials"] = recommendations_df["id"].astype(int).apply(
+            lambda x: materials_map.get(x, [])
+        )
+
+        return recommendations_df
--- a/backend/export/property_scenarios/input_schema.py
+++ b/backend/export/property_scenarios/input_schema.py
@ -0,0 +1,40 @@
+from typing import Optional, Union, List
+from pydantic import BaseModel, model_validator, PrivateAttr
+
+
+class ExportRequest(BaseModel):
+    # uuid which maps to a specific export request, used for tracking and logging
+    task_id: Union[str, None]
+    # uuid which maps to a specific export operation, used for tracking and logging. subtask is the child of the
+    # task, where the work has been distributed across workers
+    subtask_id: Union[str, None]
+    # associated portfolio id for the export request
+    portfolio_id: int
+    # list of scenario ids to export
+    scenario_ids: List[int]
+    # boolean which will overwrite the scenario ids. If this is true, we will only export the default plan for each
+    # property and will ignore the scenario ids
+    default_plans_only: Optional[bool] = False
+
+    # Private attribute to indicate whether scenario_ids should be ignored due to default_plans_only being True
+    _scenario_ids_ignored: bool = PrivateAttr(default=False)
+
+    @model_validator(mode="after")
+    def validate_default_plan_override(self):
+        """
+        If default_plans_only is True and scenario_ids were provided,
+        we allow execution but make it explicit that scenario_ids
+        will be ignored.
+        """
+        if self.default_plans_only and self.scenario_ids:
+            # We do NOT raise — we allow execution.
+            # We just mark the object so the handler can log/return a warning.
+            object.__setattr__(self, "_scenario_ids_ignored", True)
+        else:
+            object.__setattr__(self, "_scenario_ids_ignored", False)
+
+        return self
+
+    @property
+    def scenario_ids_ignored(self) -> bool:
+        return self._scenario_ids_ignored
--- a/backend/export/property_scenarios/main.py
+++ b/backend/export/property_scenarios/main.py
@ -0,0 +1,179 @@
+import json
+from typing import Optional, Any, Mapping, Dict, Union, List
+
+import pandas as pd
+from sqlalchemy.orm import Session
+
+from backend.export.property_scenarios.input_schema import ExportRequest
+from backend.export.property_scenarios.db_functions import DbMethods
+from backend.app.db.connection import db_read_session
+from backend.app.utils import sap_to_epc
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def choose_group_keys(payload: ExportRequest) -> List[Union[int, str]]:
+    if payload.default_plans_only:
+        return ["default_plans"]  # Single export, no scenario grouping
+    return payload.scenario_ids
+
+
+def has_solar_with_battery(materials_list: Optional[List[Dict[str, Any]]]) -> bool:
+    """
+    Simple check to determine if any material in the list is a solar PV measure that includes a battery.
+    :param materials_list:
+    :return:
+    """
+    for m in materials_list or []:
+        if (
+            m.get("type") == "solar_pv"
+            and m.get("includes_battery") is True
+        ):
+            return True
+    return False
+
+
+def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, int], pd.DataFrame]:
+    export_files: Dict[Union[str, int], pd.DataFrame] = {}
+
+    db_methods = DbMethods(session)
+
+    properties_df = db_methods.get_properties(payload.portfolio_id)
+
+    logger.info("Retrieved %s properties for export", len(properties_df))
+
+    plans_df: pd.DataFrame = db_methods.get_latest_plans(
+        portfolio_id=payload.portfolio_id,
+        scenario_ids=payload.scenario_ids,
+        default_only=bool(payload.default_plans_only),
+    )
+
+    logger.info("Retrieved %s plans for export", len(plans_df))
+
+    if plans_df.empty:
+        logger.info("Empty plans dataframe - no plans to export. Returning empty export.")
+        return export_files
+    plan_ids: List[int] = plans_df["id"].tolist()
+    recommendations_df: pd.DataFrame = db_methods.get_recommendations(plan_ids)
+
+    logger.info("Retrieved %s recommendations for export", len(recommendations_df))
+
+    recommendations_df = db_methods.attach_materials(recommendations_df)
+
+    recommendations_df["has_solar_with_battery"] = (
+        recommendations_df["materials"].apply(has_solar_with_battery)
+    )
+
+    _filter = (
+        (recommendations_df["measure_type"] == "solar_pv")
+        & (recommendations_df["has_solar_with_battery"])
+    )
+
+    recommendations_df.loc[_filter, "measure_type"] = (
+        recommendations_df.loc[_filter, "measure_type"] + "_with_battery"
+    )
+
+    group_keys: List[Union[str, int]] = choose_group_keys(payload)
+
+    for group_key in group_keys:
+
+        if payload.default_plans_only:
+            scenario_recs = recommendations_df
+        else:
+            scenario_recs = recommendations_df[
+                recommendations_df["scenario_id"] == group_key
+                ]
+
+        if scenario_recs.empty:
+            logger.info("No recommendations found for group_key %s - skipping export for this group", group_key)
+            continue
+
+        measures_df: pd.DataFrame = scenario_recs[
+            ["property_id", "measure_type", "plan_name", "estimated_cost"]
+        ].drop_duplicates()
+
+        pivot: pd.DataFrame = measures_df.pivot(
+            index=["property_id", "plan_name"],
+            columns="measure_type",
+            values="estimated_cost",
+        ).reset_index()
+
+        pivot["total_retrofit_cost"] = (
+            pivot.drop(columns=["property_id", "plan_name"]).sum(axis=1)
+        )
+
+        post_sap: pd.DataFrame = (
+            scenario_recs.groupby("property_id")[["sap_points"]]
+            .sum()
+            .reset_index()
+        )
+
+        df: pd.DataFrame = (
+            properties_df.rename(columns={"solar_pv": "existing_solar_pv"})
+            .merge(pivot, how="left", on="property_id")
+            .merge(post_sap, how="left", on="property_id")
+        )
+
+        df["sap_points"] = df["sap_points"].fillna(0)
+        df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
+        df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(sap_to_epc)
+
+        export_files[group_key] = df
+
+    return export_files
+
+
+# ============================================================
+# Lambda Handler
+# ============================================================
+
+def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Union[int, str]]:
+    """
+    Example event:
+    body_dict = {
+        "task_id": "test",
+        "subtask_id": "test",
+        "portfolio_id": 569,
+        "scenario_ids": [],
+        "default_plans_only": True,
+    }
+    :param event: Lambda event containing export request details
+    :param context: Lambda context (not used in this handler but included for completeness)
+    :return: HTTP response indicating success or failure of the export operation
+    """
+    for record in event.get("Records", []):
+        try:
+            body_dict = json.loads(record["body"])
+
+            logger.debug("Validating request body")
+            payload = ExportRequest.model_validate(body_dict)
+
+            if payload.scenario_ids_ignored:
+                logger.warning(
+                    "Received scenario_ids in request body but they will be ignored "
+                    "because default_plans_only is set to True"
+                )
+
+            logger.debug("Successfully validated request body")
+            with db_read_session() as session:
+                exported_files = process_export(payload, session)
+
+            # TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url
+            _ = exported_files
+            return {
+                "statusCode": 200,
+                "body": json.dumps({}),
+            }
+
+        except Exception as e:
+            logger.error(f"Failed to process record: {e}")
+            return {
+                "statusCode": 500,
+                "body": json.dumps({"message": "Failed to process export request"}),
+            }
+
+    return {
+        "statusCode": 201,
+        "body": json.dumps({"message": "No records to process"}),
+    }
--- a/backend/export/tests/conftest.py
+++ b/backend/export/tests/conftest.py
@ -0,0 +1,55 @@
+import pytest
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+from backend.app.db.base import Base
+
+
+@pytest.fixture(scope="function")
+def engine(postgresql):
+    """
+    Create a SQLAlchemy engine bound to the ephemeral
+    pytest-postgresql database.
+    """
+
+    # Build SQLAlchemy URL from psycopg connection info
+    connection_string = (
+        f"postgresql+psycopg://"
+        f"{postgresql.info.user}:"
+        f"{postgresql.info.password}@"
+        f"{postgresql.info.host}:"
+        f"{postgresql.info.port}/"
+        f"{postgresql.info.dbname}"
+    )
+
+    engine = create_engine(connection_string)
+
+    # Create tables once per test session
+    Base.metadata.create_all(engine)
+
+    # Yeild will split this function into two phase. 1) setup and 2) teardown, the latter of which will run after all
+    # tests have completed
+    yield engine
+
+    # Clean-up after entire test session
+    Base.metadata.drop_all(engine)
+    engine.dispose()
+
+
+@pytest.fixture(scope="function")
+def db_session(engine):
+    """
+    Provides a clean transactional session per test.
+
+    Rolls back after each test to keep isolation.
+    """
+
+    connection = engine.connect()
+    transaction = connection.begin()
+
+    session = sessionmaker(bind=connection)()
+
+    yield session
+
+    session.close()
+    transaction.rollback()
+    connection.close()
--- a/backend/export/tests/fixtures/plan_recs_569.csv
+++ b/backend/export/tests/fixtures/plan_recs_569.csv
@ -0,0 +1,14 @@
+id,plan_id,recommendation_id
+24799722,1604277,24798968
+24799726,1604277,24798972
+24801150,1604367,24800396
+24802703,1604448,24801949
+24802724,1604448,24801970
+24805327,1604577,24804573
+24805397,1604579,24804643
+24805401,1604579,24804647
+24813000,1605111,24812246
+24813002,1605111,24812248
+24813004,1605111,24812250
+24813006,1605112,24812252
+24813009,1605112,24812255
--- a/backend/export/tests/fixtures/plans_569.csv
+++ b/backend/export/tests/fixtures/plans_569.csv
@ -0,0 +1,11 @@
+id,name,portfolio_id,property_id,scenario_id,created_at,is_default,valuation_increase_lower_bound,valuation_increase_upper_bound,valuation_increase_average,plan_type,post_sap_points,post_epc_rating,post_co2_emissions,co2_savings,post_energy_bill,energy_bill_savings,post_energy_consumption,energy_consumption_savings,valuation_post_retrofit,valuation_increase,cost_of_works,contingency_cost
+1604277,,569,660478,1060,2026-02-19 16:14:45.560816,True,0.0302,0.07,0.048226666,,71.5,Epc.C,4.1813498,0.71865046,1447.5204,691.6662,15303.688,3276.7622,,,6984.568,1003.9568
+1604448,,569,660529,1060,2026-02-19 16:14:52.052740,True,0.0302,0.07,0.048226666,,70.0,Epc.C,7.32816,1.5818402,2978.734,2314.7651,16558.295,1837.0155,,,13528.6,2844.636
+1604367,,569,660538,1060,2026-02-19 16:14:48.517937,True,0.02,0.03,0.025,,71.0,Epc.C,5.003036,0.43696404,1933.2236,521.5316,19190.531,1883.4657,,,5520.0,828.0
+1604577,,569,660688,1060,2026-02-19 16:15:04.461456,True,0.02,0.03,0.025,,70.0,Epc.C,3.6019807,0.20801921,1610.3181,248.27809,13746.731,896.6345,,,5100.0,765.0
+1604579,,569,660690,1060,2026-02-19 16:15:04.461456,True,0.02,0.03,0.025,,70.0,Epc.C,4.7473392,0.5326607,1867.537,699.7881,18730.615,2527.2231,,,5469.0,825.74
+1605110,,569,660598,1069,2026-02-19 16:18:57.606337,True,0.0,0.0,0.0,,70.0,Epc.C,1.89,0.0,1125.7338,0.0,7268.866,0.0,,,0.0,0.0
+1605111,,569,660599,1069,2026-02-19 16:18:57.606337,True,0.0,0.0,0.0,,68.7,Epc.D,2.02,1.1,1174.9326,319.18213,7748.233,3924.9,,,1218.584,124.0984
+1605080,,569,660448,1069,2026-02-19 16:18:57.581528,True,0.0,0.0,0.0,,71.0,Epc.C,1.79,0.0,1101.9677,0.0,6821.7285,0.0,,,0.0,0.0
+1605112,,569,660600,1069,2026-02-19 16:18:57.606337,True,0.0,0.0,0.0,,64.9,Epc.D,1.89,0.8,1131.3535,172.0886,7241.062,2466.7,,,3885.834,716.7084
+1605404,,569,660652,1069,2026-02-19 16:19:28.383096,True,0.0,0.0,0.0,,71.0,Epc.C,3.18,0.0,1757.515,0.0,11929.814,0.0,,,0.0,0.0
--- a/backend/export/tests/fixtures/portfolio_569.csv
+++ b/backend/export/tests/fixtures/portfolio_569.csv
@ -0,0 +1,2 @@
+id,name,budget,status,goal,cost,number_of_properties,co2_equivalent_savings,energy_savings,energy_cost_savings,property_valuation_increase,rental_yield_increase,total_work_hours,labour_days,created_at,updated_at,epc_breakdown_pre_retrofit,epc_breakdown_post_retrofit,n_units_to_retrofit,co2_per_unit_pre_retrofit,co2_per_unit_post_retrofit,energy_bill_per_unit_pre_retrofit,energy_bill_per_unit_post_retrofit,energy_consumption_per_unit_pre_retrofit,energy_consumption_per_unit_post_retrofit,valuation_improvement_per_unit,cost_per_unit,cost_per_co2_saved,cost_per_sap_point,valuation_return_on_investment
+569,Lifespace Rentals - Sample Retrofit Plans,,PortfolioStatus.SCOPING,PortfolioGoal.NONE,,,,,,,,,,2026-02-12 21:23:37.862000+00:00,2026-02-12 21:23:37.862000+00:00,,,,,,,,,,,,,,
--- a/backend/export/tests/fixtures/properties_569.csv
+++ b/backend/export/tests/fixtures/properties_569.csv
@ -0,0 +1,11 @@
+,id,portfolio_id,creation_status,uprn,landlord_property_id,building_reference_number,status,address,postcode,has_pre_condition_report,has_recommendations,created_at,updated_at,property_type,built_form,local_authority,constituency,number_of_rooms,year_built,tenure,current_epc_rating,current_sap_points,current_valuation,installed_measures_sap_point_adjustment,is_sap_points_adjusted_for_installed_measures,original_sap_points
+0,660478,569,PropertyCreationStatus.READY,100090438731.0,BARR052,3460742868.0,PortfolioStatus.ASSESSMENT,"52, Barrack Street",CO1 2LR,True,True,2026-02-12 21:59:02.744427,2026-02-19 16:18:57.941443,House,End-Terrace,Colchester,Colchester,4.0,1900.0,rental (private),Epc.E,53.0,0.0,0.0,False,53.0
+1,660448,569,PropertyCreationStatus.READY,100090678548.0,BOUR110A,10002385993.0,PortfolioStatus.ASSESSMENT,Upper 110a Bournemouth Park Road,SS2 5LS,True,True,2026-02-12 21:59:02.388473,2026-02-19 16:18:57.578330,Flat,Detached,Southend-on-Sea,Rochford and Southend East,2.0,1950.0,Rented (private),Epc.C,71.0,0.0,0.0,False,71.0
+2,660538,569,PropertyCreationStatus.READY,10033423541.0,CHUR099,8188570968.0,PortfolioStatus.ASSESSMENT,"99, Church Road",RM3 0SH,True,True,2026-02-12 21:59:03.203854,2026-02-19 16:19:03.748571,House,Mid-Terrace,Havering,Hornchurch and Upminster,5.0,1900.0,rental (private),Epc.D,58.0,0.0,0.0,False,58.0
+3,660529,569,PropertyCreationStatus.READY,100091596678.0,CHER003,8961772668.0,PortfolioStatus.ASSESSMENT,"3, Brickfield Cottages",SS4 1PP,True,True,2026-02-12 21:59:02.935502,2026-02-19 16:18:55.971569,House,Mid-Terrace,Rochford,Rochford and Southend East,4.0,1900.0,rental (private),Epc.E,41.0,0.0,0.0,False,41.0
+4,660598,569,PropertyCreationStatus.READY,100090663644.0,FLEM049B,10006705876.0,PortfolioStatus.ASSESSMENT,49b Flemming Crescent,SS9 4HR,True,True,2026-02-12 21:59:04.732965,2026-02-19 16:18:57.601893,Flat,Semi-Detached,Southend-on-Sea,,2.0,1930.0,Rented (social),Epc.C,70.0,0.0,0.0,False,70.0
+5,660599,569,PropertyCreationStatus.READY,10012149765.0,FORE003A,9740118668.0,PortfolioStatus.ASSESSMENT,"3a, Forest Avenue",SS1 2HU,True,True,2026-02-12 21:59:04.732965,2026-02-19 16:18:57.601893,Flat,End-Terrace,Southend-on-Sea,Rochford and Southend East,2.0,1930.0,rental (private),Epc.D,56.0,0.0,0.0,False,56.0
+6,660600,569,PropertyCreationStatus.READY,10012149797.0,FORE003GFF,1436818568.0,PortfolioStatus.ASSESSMENT,"3, Forest Avenue",SS1 2HU,True,True,2026-02-12 21:59:04.732965,2026-02-19 16:18:57.601893,Flat,End-Terrace,Southend-on-Sea,Rochford and Southend East,2.0,1900.0,rental (private),Epc.D,59.0,0.0,0.0,False,59.0
+7,660652,569,PropertyCreationStatus.READY,100022668838.0,MANT061,10000429573.0,PortfolioStatus.ASSESSMENT,61 MANTILLA ROAD,SW17 8DY,True,True,2026-02-12 21:59:04.711717,2026-02-19 16:19:28.379512,Flat,Mid-Terrace,Wandsworth,Tooting,4.0,1900.0,Owner-occupied,Epc.C,71.0,0.0,0.0,False,71.0
+8,660690,569,PropertyCreationStatus.READY,100021987220.0,MERR008,9050743578.0,PortfolioStatus.ASSESSMENT,"8, Merritt Road",SE4 1DY,True,True,2026-02-12 21:59:09.459245,2026-02-19 16:19:32.826638,House,Mid-Terrace,Lewisham,"Lewisham, Deptford",6.0,1900.0,owner-occupied,Epc.D,58.0,0.0,0.0,False,58.0
+9,660688,569,PropertyCreationStatus.READY,207158120.0,MEDC048,208210678.0,PortfolioStatus.ASSESSMENT,"48, Medcalf Road",EN3 6HL,True,True,2026-02-12 21:59:09.459245,2026-02-19 16:19:32.826638,House,Mid-Terrace,Enfield,Enfield North,4.0,1900.0,rental (private),Epc.D,61.0,0.0,0.0,False,61.0
--- a/backend/export/tests/fixtures/property_details_epc_569.csv
+++ b/backend/export/tests/fixtures/property_details_epc_569.csv
@ -0,0 +1,11 @@
+,id,property_id,portfolio_id,full_address,lodgement_date,is_expired,total_floor_area,walls,walls_rating,roof,roof_rating,floor,floor_rating,windows,windows_rating,heating,heating_rating,heating_controls,heating_controls_rating,hot_water,hot_water_rating,lighting,lighting_rating,mainfuel,ventilation,solar_pv,solar_hot_water,wind_turbine,floor_height,number_heated_rooms,heat_loss_corridor,unheated_corridor_length,number_of_open_fireplaces,number_of_extensions,number_of_storeys,mains_gas,energy_tariff,primary_energy_consumption,co2_emissions,current_energy_demand,current_energy_demand_heating_hotwater,estimated,sap_05_overwritten,sap_05_score,sap_05_epc_rating,heating_cost_current,hot_water_cost_current,lighting_cost_current,appliances_cost_current,gas_standing_charge,electricity_standing_charge,original_co2_emissions,original_primary_energy_consumption,original_current_energy_demand,original_current_energy_demand_heating_hotwater,installed_measures_co2_adjustment,installed_measures_energy_demand_adjustment,installed_measures_total_energy_bill_adjustment,installed_measures_heat_demand_adjustment,is_epc_adjusted_for_installed_measures
+44,1534934,660688,569,"48, Medcalf Road",2018-09-05,False,68.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,"Solid, no insulation",,Fully double glazed,4,"Boiler and radiators, mains gas",4,"Programmer, room thermostat and trvs",4,From main system,4,Low energy lighting in all fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.55,,False,,0,0,,True,Single,278.0,3.81,14643.366,12185.6,False,False,,,711.0628,139.06198,70.770935,609.7844,128.0785,199.8375,3.81,278.0,14643.366,12185.6,0.0,0.0,0.0,0.0,False
+53,1534816,660600,569,"3, Forest Avenue",2020-02-27,False,35.0,"Solid brick, as built, no insulation",1,(another dwelling above),,"Suspended, no insulation",,Fully double glazed,3,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in 83% of fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.64,,False,,0,0,,True,Single,389.0,2.69,9707.762,8267.8,False,False,,,466.75378,110.046844,53.1057,345.6198,128.0785,199.8375,2.69,389.0,9707.762,8267.8,0.0,0.0,0.0,0.0,False
+292,1534754,660478,569,"52, Barrack Street",2019-09-11,False,67.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,"Solid, no insulation",,Partial double glazing,2,"Boiler and radiators, mains gas",4,"Programmer, room thermostat and trvs",4,From main system,4,Low energy lighting in 78% of fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.36,,False,,0,1,,True,Single,374.0,4.9,18580.451,16094.1,False,False,,,980.4243,142.37581,86.25319,602.2173,128.0785,199.8375,4.9,374.0,18580.451,16094.1,0.0,0.0,0.0,0.0,False
+295,1534868,660652,569,"61 MANTILLA ROAD, LONDON",2020-12-10,False,79.0,"Solid brick, as built, no insulation",1,(another dwelling above),,"Solid, no insulation",,Fully double glazed,3,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in all fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.63,,False,,0,0,,True,off-peak 7 hour,184.0,3.18,11929.814,9046.1,False,False,,,487.25763,143.84087,110.2875,688.2131,128.0785,199.8375,3.18,184.0,11929.814,9046.1,0.0,0.0,0.0,0.0,False
+310,1534964,660448,569,Upper 110a Bournemouth Park Road,2022-02-22,False,35.0,"Solid brick, as built, no insulation",1,"Pitched, 100 mm loft insulation",3.0,(another dwelling below),,Fully double glazed,3,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in 80% of fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.41,,False,,0,0,,True,Unknown,238.0,1.79,6821.7285,5382.4,False,False,,,272.55676,102.9448,52.930252,345.6198,128.0785,199.8375,1.79,238.0,6821.7285,5382.4,0.0,0.0,0.0,0.0,False
+344,1534936,660690,569,"8, Merritt Road",2017-08-15,False,101.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,"Suspended, no insulation",,Fully double glazed,3,"Boiler and radiators, mains gas",4,"Programmer, room thermostat and trvs",4,From main system,4,No low energy lighting,1,Mains gas not community,natural,0.0,False,0.0,2.6,,False,,0,1,,True,Unknown,260.0,5.28,21257.838,17606.3,False,False,,,1074.1602,154.13814,194.25749,816.8532,128.0785,199.8375,5.28,260.0,21257.838,17606.3,0.0,0.0,0.0,0.0,False
+460,1535385,660529,569,"3, Brickfield Cottages, Cherry Orchard Lane",2020-04-09,False,85.0,"Solid brick, as built, no insulation",2,"Pitched, 200 mm loft insulation",4.0,"Suspended, no insulation",,Fully double glazed,3,Electric storage heaters,3,Manual charge control,2,"Electric immersion, off-peak",3,Low energy lighting in 58% of fixed outlets,4,Electricity not community,natural,0.0,False,0.0,2.45,,False,,0,1,,True,dual,577.0,8.91,18395.31,15230.1,False,False,,,3550.6333,666.58136,149.46556,726.9812,0.0,199.8375,8.91,577.0,18395.31,15230.1,0.0,0.0,0.0,0.0,False
+485,1534784,660538,569,"99, Church Road, Harold Wood",2019-09-03,False,92.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,"Suspended, no insulation",,Fully double glazed,4,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in 80% of fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.52,,False,,0,1,,True,Single,297.0,5.44,21073.996,17904.0,False,False,,,1092.4246,156.6427,109.16419,768.6077,128.0785,199.8375,5.44,297.0,21073.996,17904.0,0.0,0.0,0.0,0.0,False
+494,1534814,660598,569,49b Flemming Crescent,2024-10-03,False,35.0,"Solid brick, as built, no insulation",1,(another dwelling above),,"Suspended, no insulation",,Fully double glazed,4,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in all fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.42,,False,,0,0,,True,Single,261.0,1.89,7268.866,5865.4,False,False,,,304.39737,104.800545,43.0,345.6198,128.0785,199.8375,1.89,261.0,7268.866,5865.4,0.0,0.0,0.0,0.0,False
+741,1534815,660599,569,"3a, Forest Avenue",2020-06-05,False,40.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,(another dwelling below),,Fully double glazed,3,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in 38% of fixed outlets,3,Mains gas not community,natural,0.0,False,0.0,2.58,,False,,0,0,,True,Unknown,396.0,3.12,11673.133,9974.6,False,False,,,587.73975,108.13529,85.62337,384.70035,128.0785,199.8375,3.12,396.0,11673.133,9974.6,0.0,0.0,0.0,0.0,False
--- a/backend/export/tests/fixtures/recommendations_569.csv
+++ b/backend/export/tests/fixtures/recommendations_569.csv
@ -0,0 +1,14 @@
+Unnamed: 0,id,property_id,created_at,type,measure_type,description,estimated_cost,default,starting_u_value,new_u_value,sap_points,heat_demand,kwh_savings,co2_equivalent_savings,energy_savings,energy_cost_savings,property_valuation_increase,rental_yield_increase,total_work_hours,labour_days,already_installed,plan_name
+49705,24798968,660478,2026-02-19 16:14:45.560816,heating,time_temperature_zone_control,"Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & temperature zone control)",874.568,True,,,1.5,14.9,1041.2,0.2,14.9,72.639015,,,4.16,1.0,False,whatever
+49709,24798972,660478,2026-02-19 16:14:45.560816,solar_pv,solar_pv,"8 panel system, 400W solar panels, 5.8kw Growatt battery - 3.2 kWp system",6110.0,True,,,17.0,79.1,2235.5623,0.5186504,79.1,619.02716,,,48.0,2.0,False,whatever
+51133,24800396,660538,2026-02-19 16:14:48.517937,solar_pv,solar_pv,"10 panel system, 400W solar panels - 4.0 kWp system",5520.0,True,,,13.0,58.5,1883.4657,0.43696404,58.5,521.5316,,,48.0,2.0,False,whatever
+52686,24801949,660529,2026-02-19 16:14:52.052740,heating,boiler_upgrade,"Upgrade to a new condensing boiler. Upgrade heating controls to Room thermostat, programmer and TRVs",8008.6,True,,,12.9,132.9,0.0,1.1556525,132.9,1806.0955,,,26.5,4.0,False,whatever
+52707,24801970,660529,2026-02-19 16:14:52.052740,solar_pv,solar_pv,"10 panel system, 400W solar panels - 4.0 kWp system",5520.0,True,,,16.1,68.8,1837.0155,0.4261876,68.8,508.6696,,,48.0,2.0,False,whatever
+55310,24804573,660688,2026-02-19 16:15:04.461456,solar_pv,solar_pv,"5 panel system, 400W solar panels - 2.0 kWp system",5100.0,True,,,9.0,41.4,896.6345,0.20801921,41.4,248.27809,,,48.0,2.0,False,whatever
+55380,24804643,660690,2026-02-19 16:15:04.461456,low_energy_lighting,low_energy_lighting,Install low energy lighting in 14 outlets,49.0,True,,,2.0,18.2,766.5,0.124173,18.2,212.24385,,,1.0,0.125,False,whatever
+55384,24804647,660690,2026-02-19 16:15:04.461456,solar_pv,solar_pv,"9 panel system, 400W solar panels - 3.6 kWp system",5420.0,True,,,10.0,43.9,1760.723,0.40848774,43.9,487.54422,,,48.0,2.0,False,whatever
+62983,24812246,660599,2026-02-19 16:18:57.606337,loft_insulation,loft_insulation,Install 300mm of Knauf Loft Roll 44 glass fibre roll in your loft,600.0,True,2.3,2.3,8.4,102.8,3178.2,0.9,102.8,221.72618,,,8.0,1.0,False,whatever
+62985,24812248,660599,2026-02-19 16:18:57.606337,low_energy_lighting,low_energy_lighting,Install low energy lighting in 4 outlets,14.0,True,,,1.0,14.2,219.0,0.0,14.2,60.6411,,,1.0,0.125,False,whatever
+62987,24812250,660599,2026-02-19 16:18:57.606337,heating,time_temperature_zone_control,"Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & temperature zone control)",604.584,True,,,3.3,18.4,527.7,0.2,18.4,36.814835,,,3.08,1.0,False,whatever
+62989,24812252,660600,2026-02-19 16:18:57.606337,suspended_floor_insulation,suspended_floor_insulation,Install 75mm Q-bot underfloor insulation insulation in suspended floor,3281.25,True,0.87,0.22,4.0,99.2,1816.6,0.6,99.2,126.734566,,,57.05,2.3770833,False,whatever
+62992,24812255,660600,2026-02-19 16:18:57.606337,heating,time_temperature_zone_control,"Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & temperature zone control)",604.584,True,,,1.9,17.7,650.1,0.2,17.7,45.354034,,,3.08,1.0,False,whatever
--- a/backend/export/tests/test_export.py
+++ b/backend/export/tests/test_export.py
@ -0,0 +1,540 @@
+import pandas as pd
+import numpy as np
+from pathlib import Path
+import time
+
+from backend.export.property_scenarios.main import process_export
+from backend.export.property_scenarios.input_schema import ExportRequest
+from backend.app.db.models.portfolio import PropertyModel, Epc, Portfolio, PortfolioStatus, PortfolioGoal, \
+    PropertyCreationStatus, PropertyDetailsEpcModel
+from backend.app.db.models.recommendations import PlanModel, Recommendation, PlanRecommendations, \
+    RecommendationMaterials
+from backend.app.db.models.materials import Material
+from utils.logger import setup_logger
+
+FIXTURE_PATH = Path("backend/export/tests/fixtures")
+logger = setup_logger()
+
+
+def load_csv(name: str) -> pd.DataFrame:
+    df = pd.read_csv(FIXTURE_PATH / name)
+    df = df.replace({np.nan: None})
+    return df
+
+
+def test_default_export_integration(db_session):
+    # ----------------------------------------
+    # 1) Load csvs
+    # ----------------------------------------
+    t0 = time.perf_counter()
+    portfolio_df = load_csv("portfolio_569.csv")
+    properties_df = load_csv("properties_569.csv")
+    property_details_epc_df = load_csv("property_details_epc_569.csv")
+    plans_df = load_csv("plans_569.csv")
+    plan_recs_df = load_csv("plan_recs_569.csv")
+    recommendations_df = load_csv("recommendations_569.csv")
+
+    logger.info(
+        "Loaded CSVs in %.2f seconds | properties=%s plans=%s recs=%s",
+        time.perf_counter() - t0,
+        len(properties_df),
+        len(plans_df),
+        len(recommendations_df),
+    )
+
+    logger.info("Starting database load")
+    db_load_t0 = time.perf_counter()
+
+    # ----------------------------------------
+    # 2) Insert test portfolio
+    # ----------------------------------------
+
+    portfolios = []
+    for row in portfolio_df.itertuples(index=False):
+        portfolios.append(
+            Portfolio(
+                id=row.id,
+                name=row.name,
+                status=PortfolioStatus[row.status.split(".")[-1]],
+                goal=PortfolioGoal[row.goal.split(".")[-1]] if row.goal else None,
+            )
+        )
+
+    db_session.bulk_save_objects(portfolios)
+    db_session.flush()
+    # ----------------------------------------
+    # 3) Insert test property
+    # ----------------------------------------
+
+    properties = []
+
+    for row in properties_df.itertuples(index=False):
+        row_dict = row._asdict()
+
+        row_dict["uprn"] = int(row_dict["uprn"]) if row_dict.get("uprn") else None
+        row_dict["building_reference_number"] = (
+            int(row_dict["building_reference_number"])
+            if row_dict.get("building_reference_number")
+            else None
+        )
+
+        prop = PropertyModel(**{
+            col: row_dict[col]
+            for col in PropertyModel.__table__.columns.keys()
+            if col in row_dict
+        })
+
+        prop.creation_status = PropertyCreationStatus[
+            row_dict["creation_status"].split(".")[-1]
+        ]
+        prop.status = PortfolioStatus[row_dict["status"].split(".")[-1]]
+
+        if row_dict.get("current_epc_rating"):
+            prop.current_epc_rating = Epc[
+                row_dict["current_epc_rating"].split(".")[-1]
+            ]
+
+        properties.append(prop)
+
+    db_session.bulk_save_objects(properties)
+    db_session.flush()
+
+    # ----------------------------------------
+    # 4) Insert property details - EPC
+    # ----------------------------------------
+
+    epc_rows = []
+
+    for row in property_details_epc_df.itertuples(index=False):
+        row_dict = row._asdict()
+
+        # Build only fields that exist on the model
+        epc_data = {
+            col.name: row_dict[col.name]
+            for col in PropertyDetailsEpcModel.__table__.columns.values()
+            if col.name in row_dict and col.name not in ["id", "property_id", "portfolio_id"]
+        }
+
+        epc = PropertyDetailsEpcModel(
+            property_id=row.property_id,
+            portfolio_id=row.portfolio_id,
+            **epc_data,
+        )
+
+        epc_rows.append(epc)
+
+    db_session.bulk_save_objects(epc_rows)
+    db_session.flush()
+
+    # ----------------------------------------
+    # 4) Insert default plan
+    # ----------------------------------------
+
+    plans = []
+
+    for row in plans_df.itertuples(index=False):
+        row_dict = row._asdict()
+
+        if row_dict.get("post_epc_rating"):
+            row_dict["post_epc_rating"] = Epc[
+                row_dict["post_epc_rating"].split(".")[-1]
+            ]
+
+        row_dict["scenario_id"] = None
+
+        plan = PlanModel(**{
+            col: row_dict[col]
+            for col in PlanModel.__table__.columns.keys()
+            if col in row_dict
+        })
+
+        plans.append(plan)
+
+    db_session.bulk_save_objects(plans)
+    db_session.flush()
+
+    # ----------------------------------------
+    # 5) Insert recommendation
+    # ----------------------------------------
+
+    recs = [
+        Recommendation(**{
+            col: row[col]
+            for col in Recommendation.__table__.columns.keys()
+            if col in row
+        })
+        for _, row in recommendations_df.iterrows()
+    ]
+
+    db_session.bulk_save_objects(recs)
+    db_session.flush()
+
+    # ----------------------------------------
+    # 6) Insert PlanRecommendations
+    # ----------------------------------------
+    links = [
+        PlanRecommendations(
+            plan_id=row.plan_id,
+            recommendation_id=row.recommendation_id,
+        )
+        for row in plan_recs_df.itertuples(index=False)
+    ]
+
+    db_session.bulk_save_objects(links)
+    db_session.commit()
+    logger.info("Inserted all data in %.2f seconds", time.perf_counter() - db_load_t0)
+
+    # ----------------------------------------
+    # 6) Build payload
+    # ----------------------------------------
+
+    body_dict = {
+        "task_id": "test",
+        "subtask_id": "test",
+        "portfolio_id": 569,
+        "scenario_ids": [],
+        "default_plans_only": True,
+    }
+
+    payload = ExportRequest.model_validate(body_dict)
+
+    # ----------------------------------------
+    # 7) Call process_export
+    # ----------------------------------------
+
+    logger.info(
+        "Recommendation count in DB: %s",
+        db_session.query(Recommendation).count()
+    )
+
+    logger.info(
+        "Property count in DB: %s",
+        db_session.query(PropertyModel).count()
+    )
+
+    logger.info(
+        "Property EPC in DB: %s",
+        db_session.query(PropertyDetailsEpcModel).count()
+    )
+
+    logger.info(
+        "Plan count in DB: %s",
+        db_session.query(PlanModel).count()
+    )
+
+    logger.info(
+        "PlanRecommendatons count in DB: %s",
+        db_session.query(PlanModel).count()
+    )
+
+    logger.info("Starting process_export")
+    process_t0 = time.perf_counter()
+
+    result = process_export(payload, session=db_session)
+
+    logger.info("process_export finished in %.2f seconds", time.perf_counter() - process_t0)
+
+    # ----------------------------------------
+    # 8) Assertions
+    # ----------------------------------------
+
+    assert "default_plans" in result, "Expected 'default_plans' in export result, got {}".format(result.keys())
+
+    df = result["default_plans"]
+
+    assert df.shape[0] == 10, "Expected 10 properties in the export, got {}".format(df.shape[0])
+
+    failed = df[df["predicted_post_works_sap"] < 69]
+    failed_property_types = failed["property_type"].value_counts().to_dict()
+    assert failed_property_types["Flat"] == 2
+    # Check the houses
+
+    assert failed.shape[0]
+
+    assert df["total_retrofit_cost"].sum() == 41706.585999999996, (
+        "Expected total retrofit cost to be 10000, got {}".format(df["total_retrofit_cost"].sum())
+    )
+
+    assert df["predicted_post_works_sap"].sum() == 698.1, (
+        "Expected total predicted post works SAP to be 698.1, got {}".format(df["predicted_post_works_sap"].sum())
+    )
+
+    assert df["sap_points"].sum() == 100.10000000000001, (
+        "Expected total SAP points increase to be 100.10000000000001, got {}".format(df["sap_points"].sum())
+    )
+
+    assert df.shape == (10, 95), "Expected dataframe shape to be (10, 11), got {}".format(df.shape)
+
+
+def test_solar_with_battery_example(db_session):
+    test_portfolio_id = 1
+    test_property_id = 1
+
+    portfolio_df = pd.DataFrame(
+        [{'id': test_portfolio_id, 'name': 'Example', 'budget': None,
+          'status': 'PortfolioStatus.SCOPING', 'goal': 'PortfolioGoal.NONE', 'cost': None, 'number_of_properties': None,
+          'co2_equivalent_savings': None, 'energy_savings': None, 'energy_cost_savings': None,
+          'property_valuation_increase': None, 'rental_yield_increase': None, 'total_work_hours': None,
+          'labour_days': None, 'created_at': '2026-02-12 21:23:37.862000+00:00',
+          'updated_at': '2026-02-12 21:23:37.862000+00:00', 'epc_breakdown_pre_retrofit': None,
+          'epc_breakdown_post_retrofit': None, 'n_units_to_retrofit': None, 'co2_per_unit_pre_retrofit': None,
+          'co2_per_unit_post_retrofit': None, 'energy_bill_per_unit_pre_retrofit': None,
+          'energy_bill_per_unit_post_retrofit': None, 'energy_consumption_per_unit_pre_retrofit': None,
+          'energy_consumption_per_unit_post_retrofit': None, 'valuation_improvement_per_unit': None,
+          'cost_per_unit': None, 'cost_per_co2_saved': None, 'cost_per_sap_point': None,
+          'valuation_return_on_investment': None}]
+    )
+
+    properties_df = pd.DataFrame(
+        [{'id': test_property_id, 'portfolio_id': test_portfolio_id, 'creation_status': 'PropertyCreationStatus.READY',
+          'uprn': 100090438731, 'landlord_property_id': 'BARR052', 'building_reference_number': 3460742868.0,
+          'status': 'PortfolioStatus.ASSESSMENT', 'address': '52, Barrack Street', 'postcode': 'CO1 2LR',
+          'has_pre_condition_report': True, 'has_recommendations': True, 'created_at': '2026-02-12 21:59:02.744427',
+          'updated_at': '2026-02-19 16:18:57.941443', 'property_type': 'House', 'built_form': 'End-Terrace',
+          'local_authority': 'Colchester', 'constituency': 'Colchester', 'number_of_rooms': 4.0, 'year_built': 1900.0,
+          'tenure': 'rental (private)', 'current_epc_rating': 'Epc.E', 'current_sap_points': 53.0,
+          'current_valuation': 0.0, 'installed_measures_sap_point_adjustment': 0.0,
+          'is_sap_points_adjusted_for_installed_measures': False, 'original_sap_points': 53.0}]
+    )
+
+    property_details_epc_df = pd.DataFrame(
+        [
+            {'id': 1534934, 'property_id': test_property_id, 'portfolio_id': test_portfolio_id,
+             'full_address': '48, Medcalf Road', 'lodgement_date': '2018-09-05', 'is_expired': False,
+             'total_floor_area': 68.0, 'walls': 'Solid brick, as built, no insulation', 'walls_rating': 1,
+             'roof': 'Pitched, no insulation', 'roof_rating': 1.0, 'floor': 'Solid, no insulation',
+             'floor_rating': None,
+             'windows': 'Fully double glazed', 'windows_rating': 4, 'heating': 'Boiler and radiators, mains gas',
+             'heating_rating': 4, 'heating_controls': 'Programmer, room thermostat and trvs',
+             'heating_controls_rating': 4,
+             'hot_water': 'From main system', 'hot_water_rating': 4,
+             'lighting': 'Low energy lighting in all fixed outlets', 'lighting_rating': 5,
+             'mainfuel': 'Mains gas not community', 'ventilation': 'natural', 'solar_pv': 0.0, 'solar_hot_water': False,
+             'wind_turbine': 0.0, 'floor_height': 2.55, 'number_heated_rooms': None, 'heat_loss_corridor': False,
+             'unheated_corridor_length': None, 'number_of_open_fireplaces': 0, 'number_of_extensions': 0,
+             'number_of_storeys': None, 'mains_gas': True, 'energy_tariff': 'Single',
+             'primary_energy_consumption': 278.0,
+             'co2_emissions': 3.81, 'current_energy_demand': 14643.366,
+             'current_energy_demand_heating_hotwater': 12185.6,
+             'estimated': False, 'sap_05_overwritten': False, 'sap_05_score': None, 'sap_05_epc_rating': None,
+             'heating_cost_current': 711.0628, 'hot_water_cost_current': 139.06198, 'lighting_cost_current': 70.770935,
+             'appliances_cost_current': 609.7844, 'gas_standing_charge': 128.0785,
+             'electricity_standing_charge': 199.8375,
+             'original_co2_emissions': 3.81, 'original_primary_energy_consumption': 278.0,
+             'original_current_energy_demand': 14643.366, 'original_current_energy_demand_heating_hotwater': 12185.6,
+             'installed_measures_co2_adjustment': 0.0, 'installed_measures_energy_demand_adjustment': 0.0,
+             'installed_measures_total_energy_bill_adjustment': 0.0, 'installed_measures_heat_demand_adjustment': 0.0,
+             'is_epc_adjusted_for_installed_measures': False}
+        ]
+    )
+
+    plans_df = pd.DataFrame(
+        [
+            {'id': 0, 'name': None, 'portfolio_id': test_portfolio_id, 'property_id': test_property_id,
+             'scenario_id': 1060, 'created_at': '2026-02-19 16:14:45.560816', 'is_default': True,
+             'valuation_increase_lower_bound': 0.0302,
+             'valuation_increase_upper_bound': 0.07, 'valuation_increase_average': 0.048226666, 'plan_type': None,
+             'post_sap_points': 71.5, 'post_epc_rating': 'Epc.C', 'post_co2_emissions': 4.1813498,
+             'co2_savings': 0.71865046, 'post_energy_bill': 1447.5204, 'energy_bill_savings': 691.6662,
+             'post_energy_consumption': 15303.688, 'energy_consumption_savings': 3276.7622,
+             'valuation_post_retrofit': None, 'valuation_increase': None, 'cost_of_works': 6984.568,
+             'contingency_cost': 1003.9568}
+        ]
+    )
+
+    plan_recs_df = pd.DataFrame(
+        [{'id': 0, 'plan_id': 0, 'recommendation_id': 0}]
+    )
+
+    recommendations_df = pd.DataFrame(
+        [{'id': 0, 'property_id': test_property_id, 'created_at': '2026-02-19 16:14:45.560816',
+          'type': 'solar_pv', 'measure_type': 'solar_pv',
+          'description': 'Fit solar',
+          'estimated_cost': 10000, 'default': True, 'starting_u_value': None, 'new_u_value': None, 'sap_points': 1.5,
+          'heat_demand': 14.9, 'kwh_savings': 1041.2, 'co2_equivalent_savings': 0.2, 'energy_savings': 14.9,
+          'energy_cost_savings': 72.639015, 'property_valuation_increase': None, 'rental_yield_increase': None,
+          'total_work_hours': 4.16, 'labour_days': 1.0, 'already_installed': False, 'plan_name': 'whatever'}
+         ]
+    )
+
+    recommendations_materials_df = pd.DataFrame(
+        [
+            {
+                "id": 0, "recommendation_id": 0, "material_id": 0, "depth": None, "quantity": 1.0,
+                "quantity_unit": "part",
+                "estimated_cost": 10000, "created_at": '2026-02-19 16:14:45.560816',
+                "updated_at": '2026-02-19 16:14:45.560816',
+            }
+        ]
+    )
+
+    materials_df = pd.DataFrame(
+        [
+            {'id': 0, 'type': 'solar_pv', 'description': 'Some solar product',
+             'depth': 75.0,
+             'depth_unit': 'mm', 'cost': None, 'cost_unit': 'gbp_per_m2', 'r_value_per_mm': 0.030303031,
+             'r_value_unit': 'square_meter_kelvin_per_watt', 'thermal_conductivity': 0.033,
+             'thermal_conductivity_unit': 'watt_per_meter_kelvin', 'link': 'Test',
+             'created_at': "'2026-02-19 16:14:45.560816", 'is_active': True,
+             'prime_material_cost': None,
+             'material_cost': 0.0, 'labour_cost': 0.0, 'labour_hours_per_unit': 0.0, 'plant_cost': 0.0,
+             'total_cost': 10000,
+             'notes': None, 'is_installer_quote': True, 'innovation_rate': 0.25, 'size': None, 'size_unit': None,
+             'includes_scaffolding': True, 'includes_battery': True, 'battery_size': 5.8}
+        ]
+    )
+
+    # Load into db
+    # -------------------------------------------------
+    # Insert Portfolio
+    # -------------------------------------------------
+    for row in portfolio_df.itertuples(index=False):
+        db_session.add(
+            Portfolio(
+                id=row.id,
+                name=row.name,
+                status=PortfolioStatus[row.status.split(".")[-1]],
+                goal=PortfolioGoal[row.goal.split(".")[-1]],
+            )
+        )
+    db_session.flush()
+
+    # -------------------------------------------------
+    # Insert Property
+    # -------------------------------------------------
+    for row in properties_df.itertuples(index=False):
+        prop = PropertyModel(
+            id=row.id,
+            portfolio_id=row.portfolio_id,
+            creation_status=PropertyCreationStatus[row.creation_status.split(".")[-1]],
+            status=PortfolioStatus[row.status.split(".")[-1]],
+            uprn=row.uprn,
+            property_type=row.property_type,
+            current_sap_points=row.current_sap_points,
+            current_epc_rating=Epc[row.current_epc_rating.split(".")[-1]],
+        )
+        db_session.add(prop)
+    db_session.flush()
+
+    # -------------------------------------------------
+    # Insert EPC Details
+    # -------------------------------------------------
+    for row in property_details_epc_df.itertuples(index=False):
+        epc = PropertyDetailsEpcModel(
+            property_id=row.property_id,
+            portfolio_id=row.portfolio_id,
+            full_address=row.full_address,
+            total_floor_area=row.total_floor_area,
+            walls=row.walls,
+            roof=row.roof,
+            windows=row.windows,
+            heating=row.heating,
+            solar_pv=row.solar_pv,
+        )
+        db_session.add(epc)
+    db_session.flush()
+
+    # -------------------------------------------------
+    # Insert Plan (default)
+    # -------------------------------------------------
+    for row in plans_df.itertuples(index=False):
+        plan = PlanModel(
+            id=row.id,
+            portfolio_id=row.portfolio_id,
+            property_id=row.property_id,
+            scenario_id=None,  # default mode
+            is_default=row.is_default,
+        )
+        db_session.add(plan)
+    db_session.flush()
+
+    # -------------------------------------------------
+    # IMPORTANT: Force recommendation to be solar_pv
+    # -------------------------------------------------
+    recommendations_df.loc[0, "measure_type"] = "solar_pv"
+
+    for row in recommendations_df.itertuples(index=False):
+        rec = Recommendation(
+            id=row.id,
+            property_id=row.property_id,
+            measure_type=row.measure_type,
+            estimated_cost=row.estimated_cost,
+            default=row.default,
+            already_installed=row.already_installed,
+            sap_points=row.sap_points,
+            type=row.type,
+            description=row.description
+        )
+        db_session.add(rec)
+    db_session.flush()
+
+    # -------------------------------------------------
+    # Link Plan -> Recommendation
+    # -------------------------------------------------
+    for row in plan_recs_df.itertuples(index=False):
+        db_session.add(
+            PlanRecommendations(
+                plan_id=row.plan_id,
+                recommendation_id=row.recommendation_id,
+            )
+        )
+    db_session.flush()
+
+    # -------------------------------------------------
+    # Insert Material (includes_battery=True)
+    # -------------------------------------------------
+    for row in materials_df.itertuples(index=False):
+        material = Material(
+            id=row.id,
+            type=row.type,
+            description=row.description,
+            depth_unit=row.depth_unit,
+            cost_unit=row.cost_unit,
+            r_value_unit=row.r_value_unit,
+            thermal_conductivity_unit=row.thermal_conductivity_unit,
+            includes_battery=row.includes_battery,
+            is_active=row.is_active,
+        )
+        db_session.add(material)
+    db_session.flush()
+
+    # -------------------------------------------------
+    # Link Recommendation -> Material
+    # -------------------------------------------------
+    for row in recommendations_materials_df.itertuples(index=False):
+        db_session.add(
+            RecommendationMaterials(
+                recommendation_id=row.recommendation_id,
+                material_id=row.material_id,
+                depth=row.depth or 0.0,
+                quantity=row.quantity,
+                quantity_unit=row.quantity_unit,
+                estimated_cost=row.estimated_cost,
+            )
+        )
+
+    db_session.commit()
+
+    payload = ExportRequest.model_validate({
+        "task_id": "test",
+        "subtask_id": "test",
+        "portfolio_id": test_portfolio_id,
+        "scenario_ids": [],
+        "default_plans_only": True,
+    })
+
+    result = process_export(payload, session=db_session)
+
+    assert "default_plans" in result
+
+    df = result["default_plans"]
+
+    assert "solar_pv_with_battery" in df.columns
+
+    # solar_pv should NOT exist
+    assert "solar_pv" not in df.columns
+
+    assert df.shape[0] == 1, "Expected 1 property in the export, got {}".format(df.shape[0])
+
+    # Cost should land in correct column
+    assert df["solar_pv_with_battery"].iloc[0] == 10000
--- a/backend/postcode_splitter/handler/Dockerfile
+++ b/backend/postcode_splitter/handler/Dockerfile
@ -1,9 +1,28 @@
-FROM public.ecr.aws/lambda/python:3.10
+FROM public.ecr.aws/lambda/python:3.11
+
+ARG DEV_DB_HOST
+ARG DEV_DB_PORT
+ARG DEV_DB_NAME
+
+ENV DB_HOST=${DEV_DB_HOST}
+ENV DB_PORT=${DEV_DB_PORT}
+ENV DB_NAME=${DEV_DB_NAME}

 # Set working directory (Lambda task root)
 WORKDIR /var/task

-# -----------------------------
+COPY backend/postcode_splitter/handler/requirements.txt .
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy necessary files for database and utility imports
+COPY utils/ utils/
+COPY backend/ backend/
+COPY datatypes/ datatypes/
+
+# Copy the handler
+COPY backend/postcode_splitter/main.py .
+
 # Lambda handler
-# -----------------------------
 CMD ["main.handler"]
+
--- a/backend/postcode_splitter/handler/requirements.txt
+++ b/backend/postcode_splitter/handler/requirements.txt
@ -0,0 +1,11 @@
+pandas==2.2.2
+numpy<2.0
+requests
+tqdm
+openpyxl
+epc-api-python==1.0.2
+boto3==1.35.44
+sqlmodel
+sqlalchemy==2.0.36
+psycopg2-binary==2.9.10
+pydantic-settings==2.6.0
--- a/backend/postcode_splitter/main.py
+++ b/backend/postcode_splitter/main.py
@ -1,127 +1,278 @@
+import os
+import sys
+import json
 import pandas as pd
 import requests
-from backend.address2UPRN.main import (
-    resolve_uprns_for_postcode_group,
-    get_epc_data_with_postcode,
+import boto3
+from uuid import UUID, uuid4
+from utils.s3 import (
+    read_csv_from_s3 as read_csv_from_s3_dict,
+    save_csv_to_s3,
+    parse_s3_uri,
 )
+from utils.logger import setup_logger
 from tqdm import tqdm
+from backend.app.db.functions.tasks.Tasks import SubTaskInterface
+from datetime import datetime
+
+logger = setup_logger()


-def sanitise_postcode(postcode: str) -> str | None:
+def upload_batch_to_s3(
+    batch_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
+) -> str:
    """
-    Normalise postcode for grouping.
-
-    - Uppercase
-    - Remove all whitespace
+    Upload batch DataFrame to S3 as CSV.
    """
-    if pd.isna(postcode):
-        return None
+    if bucket_name is None:
+        bucket_name = os.getenv("S3_BUCKET_NAME")

-    return postcode.upper().replace(" ", "")
-
-
-def is_valid_postcode(postcode_clean: str) -> bool:
-    """
-    Validate postcode using postcodes.io.
-
-    Expects a sanitised postcode (e.g. E84SQ).
-    Returns True if valid, False otherwise.
-    """
-    POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
-    if not postcode_clean:
-        return False
+    if not bucket_name:
+        logger.error(
+            "S3 bucket name not provided and S3_BUCKET_NAME environment variable not set"
+        )
+        raise ValueError("S3_BUCKET_NAME not configured")

    try:
-        resp = requests.get(
-            POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
-            timeout=5,
+        file_name = f"{datetime.now().isoformat()}_{str(uuid4())[:8]}"
+        file_key = (
+            f"ara_postcode_splitter_batches/{task_id}/{sub_task_id}/{file_name}.csv"
        )
-        resp.raise_for_status()
-        return resp.json().get("result", False)
-    except requests.RequestException:
-        # Network issues, rate limits, etc.
-        return False
+
+        success = save_csv_to_s3(batch_df, bucket_name, file_key)
+
+        if success:
+            s3_uri = f"s3://{bucket_name}/{file_key}"
+            logger.info(f"Successfully uploaded batch to {s3_uri}")
+            return s3_uri
+        else:
+            logger.error(f"Failed to upload batch to S3")
+            raise ValueError("Failed to save CSV to S3")
+
+    except Exception as e:
+        logger.error(f"Error uploading batch to S3: {str(e)}")
+        raise


-def main():
-    df = pd.read_excel("hackney.xlsx", sheet_name="Sustainability")
-    df = df.head(500)
+def send_to_address2uprn_queue(task_id: str, sub_task_id: str, s3_uri: str) -> str:
+    """
+    Send a batch to the address2UPRN SQS queue with S3 reference.

-    # Sanitise postcodes
-    df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode)
+    Args:
+        task_id: The parent task ID
+        sub_task_id: The new subtask ID for this batch
+        s3_uri: S3 URI pointing to the batch CSV file

-    # --- validate AFTER grouping (save API calls) ---
+    Returns:
+        Message ID from SQS
+    """
+    sqs_client = boto3.client("sqs")
+    queue_url = os.getenv("ADDRESS2UPRN_QUEUE_URL")

-    # Get unique, non-null postcodes
-    unique_postcodes = df["postcode_clean"].dropna().unique()
+    if not queue_url:
+        raise ValueError("ADDRESS2UPRN_QUEUE_URL environment variable not set")

-    # Validate each postcode once, TODOadd a progress bar
-    postcode_validity = {
-        pc: is_valid_postcode(pc)
-        for pc in tqdm(unique_postcodes, total=len(unique_postcodes))
+    message_body = {
+        "task_id": task_id,
+        "sub_task_id": sub_task_id,
+        "s3_uri": s3_uri,
    }

-    # Map validity back onto dataframe
-    df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
+    response = sqs_client.send_message(
+        QueueUrl=queue_url,
+        MessageBody=json.dumps(message_body),
+    )

+    logger.info(
+        f"Sent message to address2UPRN queue. "
+        f"Task: {task_id}, SubTask: {sub_task_id}, MessageId: {response['MessageId']}"
+    )
+
+    return response["MessageId"]
+
+
+def create_batch_and_send_to_address2uprn(
+    batch_df: pd.DataFrame,
+    task_id: str,
+    sub_task_id: str,
+    subtask_interface: SubTaskInterface,
+    bucket_name: str,
+) -> str:
+    """
+    Create a batch DataFrame, upload to S3, create subtask, and send to address2UPRN queue.
+
+    """
+    # Upload batch to S3
+
+    s3_uri = upload_batch_to_s3(batch_df, str(task_id), str(sub_task_id), bucket_name)
+
+    # Create a new subtask for this batch with all inputs
+    created_batch_sub_task_id = subtask_interface.create_subtask(
+        task_id=task_id,
+        inputs={
+            "task_id": str(task_id),
+            "s3_uri": s3_uri,
+        },
+    )
+
+    logger.info(f"Created batch subtask {created_batch_sub_task_id}")
+
+    # Send message with S3 reference
+    send_to_address2uprn_queue(
+        task_id=str(task_id),
+        sub_task_id=str(created_batch_sub_task_id),
+        s3_uri=s3_uri,
+    )
+
+    return created_batch_sub_task_id
+
+
+def handler(event, context, local=False):
+    print(f"Function: {context.function_name}")
+    print(f"Request ID: {context.aws_request_id}")
+
+    # Example SQS message for testing (copy and paste into SQS):
+    if local is True:
+        event = {
+            "Records": [
+                {
+                    "body": json.dumps(
+                        {
+                            "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
+                            "sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
+                            "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for Domna_transformed.csv",
+                        }
+                    )
+                }
+            ]
+        }
+    # Handle both single event and batch events (SQS, etc.)
+    records = event.get("Records", [event])
    results = []
+    errors = []
+    subtask_interface = SubTaskInterface()
+    bucket_name = os.getenv("S3_BUCKET_NAME")
+    if local:
+        bucket_name = "retrofit-data-dev"

-    for postcode, group_df in tqdm(
-        df[df["postcode_valid"]].groupby("postcode_clean"),
-        desc="Resolving UPRNs by postcode",
-    ):
-        try:
-            epc_df = get_epc_data_with_postcode(postcode)
+    for record in records:
+        if local:
+            record = records[0]
+        task_id = None
+        subtask_id = None
+        # Parse body (inputs)

-            if epc_df.empty:
-                tmp = group_df.copy()
-                tmp["found_uprn"] = None
-                tmp["status"] = "no_epc_results"
-                results.append(tmp)
-                continue
+        if isinstance(record.get("body"), str):
+            body = json.loads(record["body"])
+        else:
+            body = record.get("body", {})

-            resolved = resolve_uprns_for_postcode_group(
-                group_df=group_df,
-                epc_df=epc_df,
+        # Validate required fields
+        task_id = body.get("task_id")
+        subtask_id = body.get("sub_task_id")
+        s3_uri = body.get("s3_uri")
+
+        # Convert task_id to UUID
+        task_id = UUID(task_id) if isinstance(task_id, str) else task_id
+        subtask_id = UUID(subtask_id) if isinstance(subtask_id, str) else subtask_id
+
+        # Mark subtask as in progress
+        subtask_interface.update_subtask_status(subtask_id, "in progress")
+        logger.info(f"Marked subtask {subtask_id} as in progress")
+
+        # Read CSV from S3
+        bucket, key = parse_s3_uri(s3_uri)
+        logger.info(f"S3 Bucket: {bucket}, Key: {key}")
+
+        csv_data = read_csv_from_s3_dict(bucket, key)
+        df = pd.DataFrame(csv_data)
+
+        logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns")
+
+        # Sanitise postcodes
+        df["postcode_clean"] = df["postcode"].str.upper().str.replace(" ", "")
+
+        df = df.dropna(subset=["postcode_clean"])
+
+        batch_size = 500
+        if df.shape[0] < batch_size:
+            create_batch_and_send_to_address2uprn(
+                batch_df=df,
+                task_id=task_id,
+                sub_task_id=subtask_id,
+                subtask_interface=subtask_interface,
+                bucket_name=bucket_name,
            )
+        else:
+            postcode_to_addresses = {
+                postcode: group
+                for postcode, group in df.groupby("postcode_clean", sort=False)
+            }

-            results.append(resolved)
+            count = 0
+            buffer = []

-        except Exception as e:
-            tmp = group_df.copy()
-            tmp["found_uprn"] = None
-            tmp["status"] = "exception"
-            tmp["error"] = str(e)
-            results.append(tmp)
+            for postcode, group_df in postcode_to_addresses.items():
+                group_len = len(group_df)

-    final_df = pd.concat(results, ignore_index=True)
-    a = final_df[
-        [
-            "best_match_lexiscore",
-            "Address 1",
-            "best_match_address",
-            "Postcode",
-            "UPRN",
-            "best_match_uprn",
-        ]
-    ]  # add levi score to viewing
-    b = final_df[final_df["best_match_lexiscore"] > 0]  # add levi score to viewing
-    b = b[
-        [
-            "best_match_lexiscore",
-            "Address 1",
-            "best_match_address",
-            "Postcode",
-            "UPRN",
-            "best_match_uprn",
-        ]
-    ]
+                # If single postcode is bigger than batch_size → send directly
+                if group_len >= batch_size:
+                    if buffer:
+                        create_batch_and_send_to_address2uprn(
+                            batch_df=pd.concat(buffer, ignore_index=True),
+                            task_id=task_id,
+                            sub_task_id=subtask_id,
+                            subtask_interface=subtask_interface,
+                            bucket_name=bucket_name,
+                        )
+                        buffer = []
+                        count = 0

+                    create_batch_and_send_to_address2uprn(
+                        batch_df=group_df,
+                        task_id=task_id,
+                        sub_task_id=subtask_id,
+                        subtask_interface=subtask_interface,
+                        bucket_name=bucket_name,
+                    )
+                    continue

-def handler(event, context):
-    print("hello Postcode splitter world")
-    return {"statusCode": 200, "body": "hello world"}
+                # If adding would exceed batch → flush first
+                if count + group_len > batch_size:
+                    create_batch_and_send_to_address2uprn(
+                        batch_df=pd.concat(buffer, ignore_index=True),
+                        task_id=task_id,
+                        sub_task_id=subtask_id,
+                        subtask_interface=subtask_interface,
+                        bucket_name=bucket_name,
+                    )
+                    buffer = []
+                    count = 0

+                # Add group
+                buffer.append(group_df)
+                count += group_len

-if __name__ == "__main__":
-    main()
+            # Final flush
+            if buffer:
+                create_batch_and_send_to_address2uprn(
+                    batch_df=pd.concat(buffer, ignore_index=True),
+                    task_id=task_id,
+                    sub_task_id=subtask_id,
+                    subtask_interface=subtask_interface,
+                    bucket_name=bucket_name,
+                )
+
+    # Mark subtask as completed
+    subtask_interface.update_subtask_status(
+        subtask_id,
+        "completed",
+        outputs={"rows_processed": "completed"},
+    )
+
+    return {
+        "statusCode": 200,
+        "body": json.dumps(
+            {"processed": results, "errors": errors if errors else None}
+        ),
+    }
--- a/etl/customers/l_and_g/ic_slides.py
+++ b/etl/customers/l_and_g/ic_slides.py
@ -41,7 +41,10 @@ epc_data = pd.read_csv(

 # Classify floor area in <73m2, 73-98, 99-200, 200+
 epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply(
-    lambda x: "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+")
+    lambda x: (
+        "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+"
+    )
+)

 # 73-98     185
 # <73       156
@ -65,7 +68,11 @@ import pandas as pd
 import numpy as np
 from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine
-from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
+from backend.app.db.models.recommendations import (
+    Recommendation,
+    PlanModel,
+    PlanRecommendations,
+)
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel


@ -74,56 +81,79 @@ def get_data(portfolio_id, scenario_ids):
    session.begin()

    # Get properties and their details for a specific portfolio
-    properties_query = session.query(
-        PropertyModel,
-        PropertyDetailsEpcModel
-    ).join(
-        PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
-    ).filter(
-        PropertyModel.portfolio_id == portfolio_id  # Filter by portfolio ID
-    ).all()
+    properties_query = (
+        session.query(PropertyModel, PropertyDetailsEpcModel)
+        .join(
+            PropertyDetailsEpcModel,
+            PropertyModel.id == PropertyDetailsEpcModel.property_id,
+        )
+        .filter(PropertyModel.portfolio_id == portfolio_id)  # Filter by portfolio ID
+        .all()
+    )

    # Transform properties data to include all fields dynamically
    properties_data = [
-        {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
-         **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
-            PropertyDetailsEpcModel.__table__.columns}}
+        {
+            **{
+                col.name: getattr(prop.PropertyModel, col.name)
+                for col in PropertyModel.__table__.columns
+            },
+            **{
+                col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
+                for col in PropertyDetailsEpcModel.__table__.columns
+            },
+        }
        for prop in properties_query
    ]

    # Get property IDs from fetched properties

    # Get plans linked to the fetched properties
-    plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
+    plans_query = (
+        session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
+    )

    # Transform plans data to include all fields dynamically
    plans_data = [
-        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+        {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
        for plan in plans_query
    ]

    # Extract plan IDs for filtering recommendations through PlanRecommendations
-    plan_ids = [plan['id'] for plan in plans_data]
+    plan_ids = [plan["id"] for plan in plans_data]

    # Get recommendations through PlanRecommendations for those plans and that are default
-    recommendations_query = session.query(
-        Recommendation,
-        Plan.scenario_id
-    ).join(
-        PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
-    ).join(
-        Plan, Plan.id == PlanRecommendations.plan_id  # Join with Plan to access scenario_id
-    ).filter(
-        PlanRecommendations.plan_id.in_(plan_ids),
-        Recommendation.default == True  # Filtering for default recommendations
-    ).all()
+    recommendations_query = (
+        session.query(Recommendation, PlanModel.scenario_id)
+        .join(
+            PlanRecommendations,
+            Recommendation.id == PlanRecommendations.recommendation_id,
+        )
+        .join(
+            PlanModel,
+            PlanModel.id
+            == PlanRecommendations.plan_id,  # Join with Plan to access scenario_id
+        )
+        .filter(
+            PlanRecommendations.plan_id.in_(plan_ids),
+            Recommendation.default == True,  # Filtering for default recommendations
+        )
+        .all()
+    )

    # Transform recommendations data to include all fields dynamically and include scenario_id
    recommendations_data = [
-        {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
-                                                                                                           col.name) for
-            col in Recommendation.__table__.columns},
-         "Scenario ID": rec.scenario_id}
+        {
+            **{
+                col.name: (
+                    getattr(rec.Recommendation, col.name)
+                    if hasattr(rec, "Recommendation")
+                    else getattr(rec, col.name)
+                )
+                for col in Recommendation.__table__.columns
+            },
+            "Scenario ID": rec.scenario_id,
+        }
        for rec in recommendations_query
    ]

@ -132,7 +162,9 @@ def get_data(portfolio_id, scenario_ids):
    return properties_data, plans_data, recommendations_data


-properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[205])
+properties_data, plans_data, recommendations_data = get_data(
+    portfolio_id=124, scenario_ids=[205]
+)

 properties_df = pd.DataFrame(properties_data)
 plans_df = pd.DataFrame(plans_data)
@ -147,12 +179,12 @@ recommended_measures_df = recommended_measures_df.drop(columns=["default"])
 post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
 post_install_sap = post_install_sap[post_install_sap["default"]]
 # Sum up the sap points by property id
-post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
+post_install_sap = (
+    post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
+)

 recommendations_measures_pivot = recommended_measures_df.pivot(
-    index='property_id',
-    columns='measure_type',
-    values='estimated_cost'
+    index="property_id", columns="measure_type", values="estimated_cost"
 )
 recommendations_measures_pivot = recommendations_measures_pivot.reset_index()

@ -163,7 +195,7 @@ recommendations_measures_pivot = recommendations_measures_pivot.rename(
        "double_glazing": "Cost: Double Glazing",
        "loft_insulation": "Cost: Loft Insulation",
        "mechanical_ventilation": "Cost: Ventilation",
-        "solar_pv": "Cost: Solar PV"
+        "solar_pv": "Cost: Solar PV",
    }
 )
 recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
@ -186,16 +218,26 @@ recommendations_measures_pivot["Recommendation: Solar PV"] = (
    recommendations_measures_pivot["Cost: Solar PV"] > 0
 )

-df = properties_df[
-    [
-        "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
-        "current_epc_rating",
-        "current_sap_points", "total_floor_area", "number_of_rooms",
+df = (
+    properties_df[
+        [
+            "property_id",
+            "uprn",
+            "address",
+            "postcode",
+            "property_type",
+            "walls",
+            "roof",
+            "heating",
+            "windows",
+            "current_epc_rating",
+            "current_sap_points",
+            "total_floor_area",
+            "number_of_rooms",
+        ]
    ]
-].merge(
-    recommendations_measures_pivot, how="left", on="property_id"
-).merge(
-    post_install_sap, how="left", on="property_id"
+    .merge(recommendations_measures_pivot, how="left", on="property_id")
+    .merge(post_install_sap, how="left", on="property_id")
 )

 df = df.drop(columns=["property_id"])
@ -222,25 +264,36 @@ df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"])

 # We fill missings:
 for col in [
-    "Recommendation: Air Source Heat Pump", "Recommendation: Cavity Wall Insulation",
-    "Recommendation: Double Glazing", "Recommendation: Loft Insulation", "Recommendation: Ventilation",
-    "Recommendation: Solar PV"
+    "Recommendation: Air Source Heat Pump",
+    "Recommendation: Cavity Wall Insulation",
+    "Recommendation: Double Glazing",
+    "Recommendation: Loft Insulation",
+    "Recommendation: Ventilation",
+    "Recommendation: Solar PV",
 ]:
    df[col] = df[col].fillna(False)

 for col in [
-    "Cost: Air Source Heat Pump", "Cost: Cavity Wall Insulation",
-    "Cost: Double Glazing", "Cost: Loft Insulation", "Cost: Ventilation",
-    "Cost: Solar PV"
+    "Cost: Air Source Heat Pump",
+    "Cost: Cavity Wall Insulation",
+    "Cost: Double Glazing",
+    "Cost: Loft Insulation",
+    "Cost: Ventilation",
+    "Cost: Solar PV",
 ]:
    df[col] = df[col].fillna(0)

 # Calculate post SAP
 df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
 df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
-df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
+df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
+    lambda x: sap_to_epc(x)
+)

 df["Recommendation: Air Source Heat Pump"].sum()
 df["Cost: Air Source Heat Pump"].sum()

-df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", index=False)
+df.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv",
+    index=False,
+)
--- a/etl/customers/mod/pilot/2.
+++ b/etl/customers/mod/pilot/2.
@ -4,7 +4,11 @@ import numpy as np
 from backend.app.utils import sap_to_epc
 from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine
-from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
+from backend.app.db.models.recommendations import (
+    Recommendation,
+    PlanModel,
+    PlanRecommendations,
+)
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel


@ -13,56 +17,79 @@ def get_data(portfolio_id, scenario_ids):
    session.begin()

    # Get properties and their details for a specific portfolio
-    properties_query = session.query(
-        PropertyModel,
-        PropertyDetailsEpcModel
-    ).join(
-        PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
-    ).filter(
-        PropertyModel.portfolio_id == portfolio_id  # Filter by portfolio ID
-    ).all()
+    properties_query = (
+        session.query(PropertyModel, PropertyDetailsEpcModel)
+        .join(
+            PropertyDetailsEpcModel,
+            PropertyModel.id == PropertyDetailsEpcModel.property_id,
+        )
+        .filter(PropertyModel.portfolio_id == portfolio_id)  # Filter by portfolio ID
+        .all()
+    )

    # Transform properties data to include all fields dynamically
    properties_data = [
-        {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
-         **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
-            PropertyDetailsEpcModel.__table__.columns}}
+        {
+            **{
+                col.name: getattr(prop.PropertyModel, col.name)
+                for col in PropertyModel.__table__.columns
+            },
+            **{
+                col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
+                for col in PropertyDetailsEpcModel.__table__.columns
+            },
+        }
        for prop in properties_query
    ]

    # Get property IDs from fetched properties

    # Get plans linked to the fetched properties
-    plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
+    plans_query = (
+        session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
+    )

    # Transform plans data to include all fields dynamically
    plans_data = [
-        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+        {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
        for plan in plans_query
    ]

    # Extract plan IDs for filtering recommendations through PlanRecommendations
-    plan_ids = [plan['id'] for plan in plans_data]
+    plan_ids = [plan["id"] for plan in plans_data]

    # Get recommendations through PlanRecommendations for those plans and that are default
-    recommendations_query = session.query(
-        Recommendation,
-        Plan.scenario_id
-    ).join(
-        PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
-    ).join(
-        Plan, Plan.id == PlanRecommendations.plan_id  # Join with Plan to access scenario_id
-    ).filter(
-        PlanRecommendations.plan_id.in_(plan_ids),
-        Recommendation.default == True  # Filtering for default recommendations
-    ).all()
+    recommendations_query = (
+        session.query(Recommendation, PlanModel.scenario_id)
+        .join(
+            PlanRecommendations,
+            Recommendation.id == PlanRecommendations.recommendation_id,
+        )
+        .join(
+            PlanModel,
+            PlanModel.id
+            == PlanRecommendations.plan_id,  # Join with Plan to access scenario_id
+        )
+        .filter(
+            PlanRecommendations.plan_id.in_(plan_ids),
+            Recommendation.default == True,  # Filtering for default recommendations
+        )
+        .all()
+    )

    # Transform recommendations data to include all fields dynamically and include scenario_id
    recommendations_data = [
-        {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation')
-        else getattr(rec, col.name) for
-            col in Recommendation.__table__.columns},
-         "Scenario ID": rec.scenario_id}
+        {
+            **{
+                col.name: (
+                    getattr(rec.Recommendation, col.name)
+                    if hasattr(rec, "Recommendation")
+                    else getattr(rec, col.name)
+                )
+                for col in Recommendation.__table__.columns
+            },
+            "Scenario ID": rec.scenario_id,
+        }
        for rec in recommendations_query
    ]

@ -94,16 +121,34 @@ def app():
    )

    property_asset_data = properties_df.merge(
-        mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn"
+        mod_property_data.drop(columns=["address", "postcode", "tenure"]),
+        how="left",
+        on="uprn",
    )

-    property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False)
+    property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains(
+        "pitched", case=False
+    )
    property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970
-    property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip()
-    property_asset_data["is_insulated"] = (
-        property_asset_data["walls"].str.split(",").str[1].str.strip().isin(
-            ["filled cavity", "with external insulation", "filled cavity and external insulation"]
-        ) | property_asset_data["walls"].str.split(",").str[2].str.strip().isin(["insulated"])
+    property_asset_data["wall_type"] = (
+        property_asset_data["walls"].str.split(" ").str[0].str.strip()
+    )
+    property_asset_data["is_insulated"] = property_asset_data["walls"].str.split(
+        ","
+    ).str[1].str.strip().isin(
+        [
+            "filled cavity",
+            "with external insulation",
+            "filled cavity and external insulation",
+        ]
+    ) | property_asset_data[
+        "walls"
+    ].str.split(
+        ","
+    ).str[
+        2
+    ].str.strip().isin(
+        ["insulated"]
    )
    property_asset_data["is_insulated"] = np.where(
        property_asset_data["is_insulated"], "Insulated", "Uninsulated"
@ -115,18 +160,26 @@ def app():
        property_asset_data["pre_1970"], "Pre 1970", "Post 1970"
    )

-    archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970"]
+    archetype_variables = [
+        "property_type",
+        "wall_type",
+        "is_insulated",
+        "is_pitched",
+        "pre_1970",
+    ]

    assigned_archetypes = (
-        property_asset_data.groupby(
-            archetype_variables
-        ).size().reset_index().rename(columns={0: "n_properties"}).sort_values("n_properties", ascending=False)
+        property_asset_data.groupby(archetype_variables)
+        .size()
+        .reset_index()
+        .rename(columns={0: "n_properties"})
+        .sort_values("n_properties", ascending=False)
    )

    # Make the archetype ID a concatenation of the variables
-    assigned_archetypes["archetype_id"] = assigned_archetypes[archetype_variables].apply(
-        lambda x: "_".join(x.astype(str)), axis=1
-    )
+    assigned_archetypes["archetype_id"] = assigned_archetypes[
+        archetype_variables
+    ].apply(lambda x: "_".join(x.astype(str)), axis=1)

    # Most prominent archetypes
    prominent_archetypes = assigned_archetypes.head(6)
@ -136,7 +189,7 @@ def app():
    property_asset_data = property_asset_data.merge(
        assigned_archetypes[archetype_variables + ["archetype_id"]],
        how="left",
-        on=archetype_variables
+        on=archetype_variables,
    )

    # Create age bands:
@ -148,7 +201,7 @@ def app():
    property_asset_data["age_band"] = pd.cut(
        property_asset_data["BUILD_YEAR"],
        bins=[1959, 1969, 1979, 1989, 1999, 2022],
-        labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"]
+        labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"],
    )

    # Create floor area bands
@ -159,47 +212,59 @@ def app():
    property_asset_data["floor_area_band"] = pd.cut(
        property_asset_data["total_floor_area"],
        bins=[0, 73, 97, 199, 10000],
-        labels=["0-73", "74-97", "98-199", "200+"]
+        labels=["0-73", "74-97", "98-199", "200+"],
    )

    property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy()
    property_asset_data["archetype_group"] = np.where(
-        property_asset_data["archetype_id"].isin(other_archetypes["archetype_id"].values),
+        property_asset_data["archetype_id"].isin(
+            other_archetypes["archetype_id"].values
+        ),
        "other",
-        property_asset_data["archetype_group"]
+        property_asset_data["archetype_group"],
    )

    # For colour
    wall_types = (
-        property_asset_data[["wall_type"]].value_counts().to_frame().reset_index().rename(
-            columns={"wall_type": "Wall Type"}
-        )
+        property_asset_data[["wall_type"]]
+        .value_counts()
+        .to_frame()
+        .reset_index()
+        .rename(columns={"wall_type": "Wall Type"})
    )
    # Group into age bands
    ages = (
-        property_asset_data[["age_band"]].value_counts()
+        property_asset_data[["age_band"]]
+        .value_counts()
        .to_frame()
-        .reset_index().sort_values("age_band", ascending=True)
+        .reset_index()
+        .sort_values("age_band", ascending=True)
        .rename(columns={"age_band": "Age Band"})
    )
    floor_area_bands = (
-        property_asset_data[["floor_area_band"]].value_counts()
+        property_asset_data[["floor_area_band"]]
+        .value_counts()
        .to_frame()
-        .reset_index().sort_values("floor_area_band", ascending=True)
+        .reset_index()
+        .sort_values("floor_area_band", ascending=True)
        .rename(columns={"floor_area_band": "Floor Area Band"})
    )
    archetype_counts = (
-        property_asset_data[["archetype_group"]].
-        value_counts().
-        to_frame().
-        reset_index()
+        property_asset_data[["archetype_group"]]
+        .value_counts()
+        .to_frame()
+        .reset_index()
        .rename(columns={"archetype_group": "Archetype"})
    )
    property_types = (
-        (property_asset_data["property_type"] + ": " + property_asset_data["built_form"]).
-        value_counts().
-        to_frame().
-        reset_index()
+        (
+            property_asset_data["property_type"]
+            + ": "
+            + property_asset_data["built_form"]
+        )
+        .value_counts()
+        .to_frame()
+        .reset_index()
        .rename(columns={"index": "Property Type", 0: "Count"})
    )

@ -217,18 +282,24 @@ def app():
    totals = property_asset_data[
        [
            "Total_household_members",
-            "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
-            "heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
-            "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
+            "co2_emissions",
+            "current_energy_demand",
+            "current_energy_demand_heating_hotwater",
+            "heating_cost_current",
+            "hot_water_cost_current",
+            "lighting_cost_current",
+            "appliances_cost_current",
+            "gas_standing_charge",
+            "electricity_standing_charge",
        ]
    ].copy()
    totals["total_cost"] = (
-        totals["heating_cost_current"] +
-        totals["hot_water_cost_current"] +
-        totals["lighting_cost_current"] +
-        totals["appliances_cost_current"] +
-        totals["gas_standing_charge"] +
-        totals["electricity_standing_charge"]
+        totals["heating_cost_current"]
+        + totals["hot_water_cost_current"]
+        + totals["lighting_cost_current"]
+        + totals["appliances_cost_current"]
+        + totals["gas_standing_charge"]
+        + totals["electricity_standing_charge"]
    )
    print(
        totals[
@ -259,38 +330,59 @@ def app():

        scenario_recommendations_df = recommendations_df[
            recommendations_df["Scenario ID"] == scenario
-            ].copy()
+        ].copy()

-        scenario_recommendations_df["contingency"] = contingency * scenario_recommendations_df["estimated_cost"]
+        scenario_recommendations_df["contingency"] = (
+            contingency * scenario_recommendations_df["estimated_cost"]
+        )
        scenario_recommendations_df["total_cost"] = (
-            scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"]
+            scenario_recommendations_df["estimated_cost"]
+            + scenario_recommendations_df["contingency"]
        )

        recommended_measures_df = scenario_recommendations_df[
            ["property_id", "measure_type", "estimated_cost", "default"]
        ]

-        recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
+        recommended_measures_df = recommended_measures_df[
+            recommended_measures_df["default"]
+        ]
        recommended_measures_df = recommended_measures_df.drop(columns=["default"])

        # Metrics by property ID
        aggregated_metrics = scenario_recommendations_df[
            [
-                "property_id", "type", "default", "sap_points",
-                "energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency",
-                "total_cost"
+                "property_id",
+                "type",
+                "default",
+                "sap_points",
+                "energy_cost_savings",
+                "kwh_savings",
+                "co2_equivalent_savings",
+                "estimated_cost",
+                "contingency",
+                "total_cost",
            ]
        ]
        aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]]
-        aggregated_metrics = aggregated_metrics.groupby("property_id")[
-            ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
-             "total_cost", "contingency"]
-        ].sum().reset_index()
+        aggregated_metrics = (
+            aggregated_metrics.groupby("property_id")[
+                [
+                    "sap_points",
+                    "co2_equivalent_savings",
+                    "energy_cost_savings",
+                    "kwh_savings",
+                    "estimated_cost",
+                    "total_cost",
+                    "contingency",
+                ]
+            ]
+            .sum()
+            .reset_index()
+        )

        recommendations_measures_pivot = recommended_measures_df.pivot(
-            index='property_id',
-            columns='measure_type',
-            values='estimated_cost'
+            index="property_id", columns="measure_type", values="estimated_cost"
        )
        recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
        recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
@ -299,30 +391,58 @@ def app():
        for c in recommendations_measures_pivot.columns:
            if c == "property_id":
                continue
-            recommendations_measures_pivot["Recommendation: " + c] = recommendations_measures_pivot[c] > 0
+            recommendations_measures_pivot["Recommendation: " + c] = (
+                recommendations_measures_pivot[c] > 0
+            )

        # We now create a final output
-        df = properties_df[
-            [
-                "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
-                "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
-                "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
-                "heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
-                "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
+        df = (
+            properties_df[
+                [
+                    "property_id",
+                    "uprn",
+                    "address",
+                    "postcode",
+                    "property_type",
+                    "walls",
+                    "roof",
+                    "heating",
+                    "windows",
+                    "current_epc_rating",
+                    "current_sap_points",
+                    "total_floor_area",
+                    "number_of_rooms",
+                    "co2_emissions",
+                    "current_energy_demand",
+                    "current_energy_demand_heating_hotwater",
+                    "heating_cost_current",
+                    "hot_water_cost_current",
+                    "lighting_cost_current",
+                    "appliances_cost_current",
+                    "gas_standing_charge",
+                    "electricity_standing_charge",
+                ]
            ]
-        ].merge(
-            recommendations_measures_pivot, how="left", on="property_id"
-        ).merge(
-            aggregated_metrics, how="left", on="property_id"
+            .merge(recommendations_measures_pivot, how="left", on="property_id")
+            .merge(aggregated_metrics, how="left", on="property_id")
        )

        df["bills_total_cost"] = (
-            df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] +
-            df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"]
+            df["heating_cost_current"]
+            + df["hot_water_cost_current"]
+            + df["lighting_cost_current"]
+            + df["appliances_cost_current"]
+            + df["gas_standing_charge"]
+            + df["electricity_standing_charge"]
        )

        df = df.drop(columns=["property_id"])
-        for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]:
+        for c in [
+            "sap_points",
+            "co2_equivalent_savings",
+            "energy_cost_savings",
+            "kwh_savings",
+        ]:
            df[c] = df[c].fillna(0)

        df = df.rename(
@ -345,16 +465,23 @@ def app():
        # Calculate post SAP
        df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
        df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
-        df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
+        df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
+            lambda x: sap_to_epc(x)
+        )

        # Calculate the relative savings on carbon, kwh, and bills
-        df["relative_carbon_savings"] = df["co2_equivalent_savings"] / df["co2_emissions"]
+        df["relative_carbon_savings"] = (
+            df["co2_equivalent_savings"] / df["co2_emissions"]
+        )
        df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"]
        df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"]

        # Add on the archetype
        df = df.merge(
-            property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn"
+            property_asset_data[["uprn", "archetype_group"]],
+            how="left",
+            left_on="UPRN",
+            right_on="uprn",
        )

        # For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it
@ -387,7 +514,9 @@ def app():

    printing_scenario_id = scenario_ids[0]
    # EPC breakdown
-    print(scenario_data[printing_scenario_id]['Predicted Post Works EPC'].value_counts())
+    print(
+        scenario_data[printing_scenario_id]["Predicted Post Works EPC"].value_counts()
+    )
    # Cost
    # Total cost
    print(scenario_data[printing_scenario_id]["total_cost"].sum())
@ -408,16 +537,24 @@ def app():
    measure_details = {}
    for scenario in scenario_ids:
        measure_details[scenario] = {}
-        recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c]
-        measure_details[scenario]["count"] = scenario_data[scenario][recommendation_cols].sum().to_dict()
+        recommendation_cols = [
+            c for c in scenario_data[scenario].columns if "Recommendation:" in c
+        ]
+        measure_details[scenario]["count"] = (
+            scenario_data[scenario][recommendation_cols].sum().to_dict()
+        )
        # Get average cost per measure
        measure_columns = [
-            c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c
+            c.split("Recommendation: ")[1]
+            for c in scenario_data[scenario].columns
+            if "Recommendation:" in c
        ]
        # Take the mean, drop zero columns
        measure_costs = {}
        for m in measure_columns:
-            measure_costs[m] = float(scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean())
+            measure_costs[m] = float(
+                scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean()
+            )
        measure_details[scenario]["cost_per_measure"] = measure_costs

    pprint(measure_details[scenario_ids[0]]["count"])
@ -452,12 +589,27 @@ def app():
    for scenario in scenario_ids:
        df = scenario_data[scenario].copy()

-        avg_savings = df[
-            ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
-             "total_cost", "contingency"]
-        ].mean().to_dict()
-        avg_savings["cost_per_sap_point"] = avg_savings["total_cost"] / avg_savings["sap_points"]
-        avg_savings["cost_per_carbon"] = avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
+        avg_savings = (
+            df[
+                [
+                    "sap_points",
+                    "co2_equivalent_savings",
+                    "energy_cost_savings",
+                    "kwh_savings",
+                    "estimated_cost",
+                    "total_cost",
+                    "contingency",
+                ]
+            ]
+            .mean()
+            .to_dict()
+        )
+        avg_savings["cost_per_sap_point"] = (
+            avg_savings["total_cost"] / avg_savings["sap_points"]
+        )
+        avg_savings["cost_per_carbon"] = (
+            avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
+        )
        scenario_metrics[scenario] = avg_savings

    pprint(scenario_metrics[scenario_ids[0]])
@ -465,11 +617,11 @@ def app():

    scenario_data[scenario_ids[0]]["loft_insulation"][
        scenario_data[scenario_ids[0]]["loft_insulation"] > 0
-        ].mean()
+    ].mean()

    scenario_data[scenario_ids[0]]["cavity_wall_insulation"][
        scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0
-        ].mean()
+    ].mean()

    # Testing checking floor risk

@ -477,11 +629,7 @@ def app():

    def get_flood_risk(lat, lon, radius_km=1):
        url = "https://environment.data.gov.uk/flood-monitoring/id/floods"
-        params = {
-            'lat': lat,
-            'long': lon,
-            'dist': radius_km  # search radius in km
-        }
+        params = {"lat": lat, "long": lon, "dist": radius_km}  # search radius in km

        response = requests.get(url, params=params)
        response.raise_for_status()
@ -495,20 +643,19 @@ def app():
            print(f"{len(flood_warnings)} warning(s) found near the location:")
            for warning in flood_warnings:
                print(f"- Area: {warning.get('description')}")
-                print(f"  Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})")
+                print(
+                    f"  Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})"
+                )
                print(f"  Message changed at: {warning.get('timeMessageChanged')}")
                print()

        return flood_warnings

    from shapely.geometry import shape, Point
+
    def get_flood_areas_near_point(lat, lon, radius_km=2):
        url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas"
-        params = {
-            'lat': lat,
-            'long': lon,
-            'dist': radius_km
-        }
+        params = {"lat": lat, "long": lon, "dist": radius_km}

        response = requests.get(url, params=params)
        response.raise_for_status()
@ -531,7 +678,7 @@ def app():
            if not features:
                continue

-            flood_polygon = shape(features[0]['geometry'])
+            flood_polygon = shape(features[0]["geometry"])

            try:
                is_inside = flood_polygon.contains(point)
@ -539,12 +686,17 @@ def app():
                is_inside = False

            if is_inside:
-                print(f"📍 Point is inside flood area: {area['label']} ({area['notation']})")
+                print(
+                    f"📍 Point is inside flood area: {area['label']} ({area['notation']})"
+                )
                return area

    from tqdm import tqdm
+
    floor_warnings_data = []
-    for _, property in tqdm(property_asset_data.iterrows(), total=len(property_asset_data)):
+    for _, property in tqdm(
+        property_asset_data.iterrows(), total=len(property_asset_data)
+    ):
        # warnings = floor_warnings_data.extend(
        #     get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1)
        # )
@ -556,7 +708,7 @@ def app():
                    "uprn": property["uprn"],
                    "address": property["address"],
                    "postcode": property["postcode"],
-                    "area": resp
+                    "area": resp,
                }
            )
            continue
@ -570,7 +722,7 @@ def app():
        "House_Cavity_Uninsulated_Pitched roof_Post 1970",
        "other",
        "House_System_Uninsulated_Pitched roof_Pre 1970",
-        "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970"
+        "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970",
    ]

    values = [62, 36, 21, 16, 16, 4, 2]
@ -582,36 +734,39 @@ def app():
        "Cavity wall insulation, ventilation",
        "Bespoke retrofit measures",
        "External wall insulation, roof insulation",
-        "Flat roof insulation, internal wall insulation"
+        "Flat roof insulation, internal wall insulation",
    ]

-    fig = go.Figure(go.Treemap(
-        labels=labels,
-        parents=[""] * len(labels),  # No root
-        values=values,
-        hovertext=hovertext,
-        hoverinfo="text",
-        textinfo="none",
-        marker=dict(
-            line=dict(color="white", width=4),
-            colors=values,
-            colorscale="Blues"
+    fig = go.Figure(
+        go.Treemap(
+            labels=labels,
+            parents=[""] * len(labels),  # No root
+            values=values,
+            hovertext=hovertext,
+            hoverinfo="text",
+            textinfo="none",
+            marker=dict(
+                line=dict(color="white", width=4), colors=values, colorscale="Blues"
+            ),
        )
-    ))
+    )

    fig.update_layout(
-        margin=dict(t=10, l=10, r=10, b=10),
-        plot_bgcolor="white",
-        paper_bgcolor="white"
+        margin=dict(t=10, l=10, r=10, b=10), plot_bgcolor="white", paper_bgcolor="white"
    )

    fig.show()

    # Get the recommended measures by scenario id
-    recommendation_cols = [c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c]
-    measure_counts_by_scenario = scenario_data[scenario_ids[1]].groupby("archetype_group")[
-        recommendation_cols
-    ].sum().reset_index()
+    recommendation_cols = [
+        c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c
+    ]
+    measure_counts_by_scenario = (
+        scenario_data[scenario_ids[1]]
+        .groupby("archetype_group")[recommendation_cols]
+        .sum()
+        .reset_index()
+    )

    measure_counts_by_scenario.to_csv(
        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv"
@ -630,15 +785,13 @@ def app():

        to_append = {"uprn": uprn}
        for _id in scenario_ids:
-            scenario = scenario_data[_id][
-                scenario_data[_id]["uprn"] == uprn
-                ].squeeze()
+            scenario = scenario_data[_id][scenario_data[_id]["uprn"] == uprn].squeeze()

            val = PropertyValuation.estimate_valuation_improvement(
                current_value=x["valuation"],
                current_epc=scenario["Current EPC Rating"].value,
                target_epc=scenario["Predicted Post Works EPC"],
-                total_cost=None
+                total_cost=None,
            )

            to_append[_id] = val["average_increase"]
--- a/etl/customers/newhaven/slides.py
+++ b/etl/customers/newhaven/slides.py
--- a/Project/d_restart_failed_subtasks.py
+++ b/Project/d_restart_failed_subtasks.py
@ -10,6 +10,7 @@ Additionally, we wil find the problematic records and remove them
 Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan
 or recommendations in case something went wrong
 """
+
 import pandas as pd
 from sqlalchemy.orm import Session
 from backend.app.db.models.portfolio import PropertyModel
@ -19,8 +20,7 @@ from backend.app.db.connection import db_session
 def get_uprns_for_portfolio(session: Session, portfolio_id: int) -> list[int]:
    return [
        uprn
-        for (uprn,) in
-        session.query(PropertyModel.uprn)
+        for (uprn,) in session.query(PropertyModel.uprn)
        .filter(PropertyModel.portfolio_id == portfolio_id)
        .all()
        if uprn is not None
@ -34,7 +34,7 @@ with db_session() as session:
 sal = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
    "data.xlsx",
-    sheet_name="Standardised Asset List"
+    sheet_name="Standardised Asset List",
 )

 missed_properties = sal[~sal["epc_os_uprn"].isin(completed_uprns)]
@ -44,7 +44,7 @@ missed_properties.to_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
    "d_failed_properties_to_restart_20260102.xlsx",
    sheet_name="Standardised Asset List",
-    index=False
+    index=False,
 )

 # Fixing an error - triggered jobs without removing EWI/IWI so need to delete all plans associated to these scenarios:
@ -52,14 +52,14 @@ scenario_id = None

 from sqlalchemy import select, func
 from sqlalchemy.orm import Session
-from backend.app.db.models.recommendations import Plan
+from backend.app.db.models.recommendations import PlanModel


 def count_plans_for_scenario(session: Session, scenario_id: int) -> int:
    return session.execute(
        select(func.count())
-        .select_from(Plan)
-        .where(Plan.scenario_id == scenario_id)
+        .select_from(PlanModel)
+        .where(PlanModel.scenario_id == scenario_id)
    ).scalar_one()


@ -69,8 +69,7 @@ with db_session() as session:

 def get_plan_ids_for_scenario(session: Session, scenario_id: int) -> list[int]:
    result = session.execute(
-        select(Plan.id)
-        .where(Plan.scenario_id == scenario_id)
+        select(PlanModel.id).where(PlanModel.scenario_id == scenario_id)
    )
    return [row.id for row in result]

@ -84,7 +83,7 @@ from sqlalchemy.orm import Session

 def chunked(iterable, size):
    for i in range(0, len(iterable), size):
-        yield iterable[i:i + size]
+        yield iterable[i : i + size]


 from sqlalchemy import text
@ -103,12 +102,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # recommendation_materials
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation_materials rm
            USING plan_recommendations pr
            WHERE rm.recommendation_id = pr.recommendation_id
              AND pr.plan_id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

@ -116,10 +117,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # plan_recommendations
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan_recommendations
            WHERE plan_id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

@ -127,14 +130,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # recommendations (only those used by these plans)
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation r
            WHERE r.id IN (
                SELECT DISTINCT recommendation_id
                FROM plan_recommendations
                WHERE plan_id = ANY(:plan_ids)
            )
-        """),
+        """
+        ),
        params,
    )

@ -142,10 +147,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # plans LAST
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan
            WHERE id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

--- a/etl/customers/peabody/Nov
+++ b/etl/customers/peabody/Nov
@ -5,6 +5,7 @@ This includes:
 # EPC C, there should be a plan
 2) If the plan is fabric first, make sure they are actually fabric first
 """
+
 import pandas as pd

 scenario_names = {
@ -33,7 +34,9 @@ for scenario_id, scenario_name in scenario_names.items():
    )

    # find properties that are below the scenario sap target, but have no recommended measures
-    df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id]
+    df["below_scenario_target"] = (
+        df["current_sap_points"] < scenario_sap_targets[scenario_id]
+    )
    df["no_recommended_measures"] = df["sap_points"] == 0
    df["zero_cost"] = df["total_retrofit_cost"] == 0
    df["sap_points_above_zero"] = df["sap_points"] > 0
@ -45,7 +48,9 @@ for scenario_id, scenario_name in scenario_names.items():
    ].copy()

    if scenario_sap_targets[scenario_id] == 81:
-        problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"]
+        problematic_properties = problematic_properties[
+            problematic_properties["property_type"] != "Flat"
+        ]

    zero_cost_above_zero_sap = df[
        (df["sap_points_above_zero"] & df["zero_cost"])
@ -61,8 +66,12 @@ for scenario_id, scenario_name in scenario_names.items():
    # pd.set_option('display.width', 1000)
    # problematic_properties.head(len(problematic_properties))

-    print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})")
-    print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})")
+    print(
+        f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})"
+    )
+    print(
+        f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})"
+    )

    problems.append(problematic_properties)
    problems.append(zero_cost_above_zero_sap)
@ -97,12 +106,12 @@ all_problems = all_problems.drop_duplicates(subset=["uprn"])
 sal = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
    "data.xlsx",
-    sheet_name="Standardised Asset List"
+    sheet_name="Standardised Asset List",
 )
 sal2 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
    "UPRNS.xlsx",
-    sheet_name="Standardised Asset List"
+    sheet_name="Standardised Asset List",
 )

 sal = pd.concat([sal, sal2])
@ -114,7 +123,7 @@ retry.to_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
    "d_problematic_properties_to_review_20260106.xlsx",
    sheet_name="Standardised Asset List",
-    index=False
+    index=False,
 )

 # Delete associated plans
@ -126,19 +135,20 @@ uprns = retry["epc_os_uprn"].tolist()
 from sqlalchemy.orm import Session
 from backend.app.db.models.portfolio import PropertyModel
 from backend.app.db.connection import db_session
-from backend.app.db.models.recommendations import Plan
+from backend.app.db.models.recommendations import PlanModel
 from sqlalchemy import select, delete
 from sqlalchemy.exc import NoResultFound
 from sqlalchemy.orm import sessionmaker


-def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]:
+def get_property_ids_for_uprns(
+    session: Session, portfolio_id: int, uprns: list[int]
+) -> list[int]:
    return [
        property.id
        for property in session.query(PropertyModel)
        .filter(
-            PropertyModel.portfolio_id == portfolio_id,
-            PropertyModel.uprn.in_(uprns)
+            PropertyModel.portfolio_id == portfolio_id, PropertyModel.uprn.in_(uprns)
        )
        .all()
    ]
@ -149,15 +159,21 @@ with db_session() as session:


 # Get all and delete plans for these property IDs
-def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]:
-    return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all()
+def get_all_plans_for_property_ids(
+    session: Session, property_ids: list[int]
+) -> list[PlanModel]:
+    return (
+        session.query(PlanModel).filter(PlanModel.property_id.in_(property_ids)).all()
+    )


-def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]:
+def get_ids_of_plans_for_deletion(
+    session: Session, property_ids: list[int]
+) -> list[int]:
    return [
        plan.id
-        for plan in session.query(Plan)
-        .filter(Plan.property_id.in_(property_ids))
+        for plan in session.query(PlanModel)
+        .filter(PlanModel.property_id.in_(property_ids))
        .all()
    ]

@ -168,7 +184,7 @@ with db_session() as session:

 def chunked(iterable, size):
    for i in range(0, len(iterable), size):
-        yield iterable[i:i + size]
+        yield iterable[i : i + size]


 from sqlalchemy import text
@ -187,12 +203,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # recommendation_materials
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation_materials rm
            USING plan_recommendations pr
            WHERE rm.recommendation_id = pr.recommendation_id
              AND pr.plan_id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

@ -200,10 +218,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # plan_recommendations
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan_recommendations
            WHERE plan_id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

@ -211,14 +231,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # recommendations (only those used by these plans)
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation r
            WHERE r.id IN (
                SELECT DISTINCT recommendation_id
                FROM plan_recommendations
                WHERE plan_id = ANY(:plan_ids)
            )
-        """),
+        """
+        ),
        params,
    )

@ -226,10 +248,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # plans LAST
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan
            WHERE id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

--- a/Project/g_rebaselining_installed_measrues.py
+++ b/Project/g_rebaselining_installed_measrues.py
--- a/Project/h_reset_estimated_epcs.py
+++ b/Project/h_reset_estimated_epcs.py
@ -3,31 +3,41 @@ from sqlalchemy.orm import Session
 from sqlalchemy import text, select
 from backend.app.db.connection import db_read_session
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
-from backend.app.db.models.recommendations import Plan
+from backend.app.db.models.recommendations import PlanModel

 PORTFOLIO_ID = 435

 with db_read_session() as session:
    # Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419
-    estimated_epcs = session.query(PropertyDetailsEpcModel).filter(
-        # PropertyDetailsEpcModel.estimated == True,
-        PropertyDetailsEpcModel.property_id.in_(
-            session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == PORTFOLIO_ID)
+    estimated_epcs = (
+        session.query(PropertyDetailsEpcModel)
+        .filter(
+            # PropertyDetailsEpcModel.estimated == True,
+            PropertyDetailsEpcModel.property_id.in_(
+                session.query(PropertyModel.id).filter(
+                    PropertyModel.portfolio_id == PORTFOLIO_ID
+                )
+            )
        )
-    ).all()
+        .all()
+    )

    # Get the ids
    estimated_epc_ids = [epc.property_id for epc in estimated_epcs]

 # I want to get the UPRNS for these properties, from the property model
 with db_read_session() as session:
-    estimated_uprns = session.query(PropertyModel.uprn).filter(
-        PropertyModel.id.in_(
-            session.query(PropertyDetailsEpcModel.property_id).filter(
-                PropertyDetailsEpcModel.id.in_(estimated_epc_ids)
+    estimated_uprns = (
+        session.query(PropertyModel.uprn)
+        .filter(
+            PropertyModel.id.in_(
+                session.query(PropertyDetailsEpcModel.property_id).filter(
+                    PropertyDetailsEpcModel.id.in_(estimated_epc_ids)
+                )
            )
        )
-    ).all()
+        .all()
+    )

    estimated_uprns_list = [uprn for (uprn,) in estimated_uprns]

@ -35,16 +45,16 @@ with db_read_session() as session:
 sal_1 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
    "data.xlsx",
-    sheet_name="Standardised Asset List"
+    sheet_name="Standardised Asset List",
 )
 sal_2 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
    "UPRNS.xlsx",
-    sheet_name="Standardised Asset List"
+    sheet_name="Standardised Asset List",
 )

 sal = pd.concat([sal_1, sal_2])
-sal = sal.drop_duplicates(subset=['epc_os_uprn'])
+sal = sal.drop_duplicates(subset=["epc_os_uprn"])

 estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy()

@ -55,20 +65,24 @@ SCENARIOS = [
    # 861,  # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
    # 859,  # EPC C - no solid floor, ashp 3.0
    # 885,  # EPC B - fabric first, no solid floor, ashp 3.0
-    908, 909, 910
+    908,
+    909,
+    910,
 ]

 # Get all plans, associated to these properties - the property IDs are in estimated_epc_ids
 with db_read_session() as session:
    result = session.execute(
-        select(Plan.id, Plan.property_id)
-        .where(Plan.property_id.in_(estimated_epc_ids))
+        select(PlanModel.id, PlanModel.property_id).where(
+            PlanModel.property_id.in_(estimated_epc_ids)
+        )
    )
    plans = [
        {
            "plan_id": row.id,
            "property_id": row.property_id,
-        } for row in result
+        }
+        for row in result
    ]

 df = pd.DataFrame(plans)
@ -96,12 +110,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # recommendation_materials
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation_materials rm
            USING plan_recommendations pr
            WHERE rm.recommendation_id = pr.recommendation_id
              AND pr.plan_id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

@ -109,10 +125,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # plan_recommendations
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan_recommendations
            WHERE plan_id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

@ -120,14 +138,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # recommendations (only those used by these plans)
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation r
            WHERE r.id IN (
                SELECT DISTINCT recommendation_id
                FROM plan_recommendations
                WHERE plan_id = ANY(:plan_ids)
            )
-        """),
+        """
+        ),
        params,
    )

@ -135,17 +155,21 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # plans LAST
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan
            WHERE id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )


 # Store the SAL
-filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 "
-            "sal.xlsx")
+filename = (
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 "
+    "sal.xlsx"
+)

 with pd.ExcelWriter(filename) as writer:
    sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
@ -164,34 +188,36 @@ with pd.ExcelWriter(filename) as writer:
 b1 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
    "sal.xlsx",
-    sheet_name="batch 1"
+    sheet_name="batch 1",
 )
 b2 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
    "sal.xlsx",
-    sheet_name="batch 2"
+    sheet_name="batch 2",
 )
 b3 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
    "sal.xlsx",
-    sheet_name="batch 3"
+    sheet_name="batch 3",
 )
 b4 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
    "sal.xlsx",
-    sheet_name="batch 4"
+    sheet_name="batch 4",
 )
 b5 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
    "sal.xlsx",
-    sheet_name="batch 5"
+    sheet_name="batch 5",
 )
 # Batch 6 should be the remaining
 total = pd.concat([b1, b2, b3, b4, b5])
 remaining = sal[~sal["epc_os_uprn"].isin(total["epc_os_uprn"].values)]
 # Create new output
-filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/"
-            "20260107 corrected batch 6 sal.xlsx")
+filename = (
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/"
+    "20260107 corrected batch 6 sal.xlsx"
+)

 with pd.ExcelWriter(filename) as writer:
    sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
@ -206,6 +232,4 @@ with pd.ExcelWriter(filename) as writer:
    b5.to_excel(writer, sheet_name="batch 5", index=False)
    remaining.to_excel(writer, sheet_name="batch 6", index=False)

-all_together = pd.concat(
-    [b1, b2, b3, b4, b5, remaining]
-)
+all_together = pd.concat([b1, b2, b3, b4, b5, remaining])
--- a/etl/customers/peabody/Nov
+++ b/etl/customers/peabody/Nov
@ -110,14 +110,17 @@ import pandas as pd
 # Solar PV savings - we need the amount of solar PV bill savings
 from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine
-from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
+from backend.app.db.models.recommendations import (
+    Recommendation,
+    PlanModel,
+    PlanRecommendations,
+    RecommendationMaterials,
+)
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
 from collections import defaultdict

 PORTFOLIO_ID = 485  # Peabody
-SCENARIOS = [
-    970
-]
+SCENARIOS = [970]
 scenario_names = {
    970: "EPC C - no solid floor, ashp 3.0",
 }
@ -130,22 +133,26 @@ def get_data(portfolio_id, scenario_ids):
    # --------------------
    # Properties
    # --------------------
-    properties_query = session.query(
-        PropertyModel,
-        PropertyDetailsEpcModel
-    ).join(
-        PropertyDetailsEpcModel,
-        PropertyModel.id == PropertyDetailsEpcModel.property_id
-    ).filter(
-        PropertyModel.portfolio_id == portfolio_id
-    ).all()
+    properties_query = (
+        session.query(PropertyModel, PropertyDetailsEpcModel)
+        .join(
+            PropertyDetailsEpcModel,
+            PropertyModel.id == PropertyDetailsEpcModel.property_id,
+        )
+        .filter(PropertyModel.portfolio_id == portfolio_id)
+        .all()
+    )

    properties_data = [
        {
-            **{col.name: getattr(p.PropertyModel, col.name)
-               for col in PropertyModel.__table__.columns},
-            **{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
-               for col in PropertyDetailsEpcModel.__table__.columns},
+            **{
+                col.name: getattr(p.PropertyModel, col.name)
+                for col in PropertyModel.__table__.columns
+            },
+            **{
+                col.name: getattr(p.PropertyDetailsEpcModel, col.name)
+                for col in PropertyDetailsEpcModel.__table__.columns
+            },
        }
        for p in properties_query
    ]
@ -153,12 +160,12 @@ def get_data(portfolio_id, scenario_ids):
    # --------------------
    # Plans
    # --------------------
-    plans_query = session.query(Plan).filter(
-        Plan.scenario_id.in_(scenario_ids)
-    ).all()
+    plans_query = (
+        session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
+    )

    plans_data = [
-        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+        {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
        for plan in plans_query
    ]

@ -167,27 +174,29 @@ def get_data(portfolio_id, scenario_ids):
    # --------------------
    # Recommendations (NO materials yet)
    # --------------------
-    recommendations_query = session.query(
-        Recommendation,
-        Plan.scenario_id
-    ).join(
-        PlanRecommendations,
-        Recommendation.id == PlanRecommendations.recommendation_id
-    ).join(
-        Plan,
-        Plan.id == PlanRecommendations.plan_id
-    ).filter(
-        PlanRecommendations.plan_id.in_(plan_ids),
-        Recommendation.default.is_(True),
-        Recommendation.already_installed.is_(False)
-    ).all()
+    recommendations_query = (
+        session.query(Recommendation, PlanModel.scenario_id)
+        .join(
+            PlanRecommendations,
+            Recommendation.id == PlanRecommendations.recommendation_id,
+        )
+        .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
+        .filter(
+            PlanRecommendations.plan_id.in_(plan_ids),
+            Recommendation.default.is_(True),
+            Recommendation.already_installed.is_(False),
+        )
+        .all()
+    )

    recommendations_data = [
        {
-            **{col.name: getattr(r.Recommendation, col.name)
-               for col in Recommendation.__table__.columns},
+            **{
+                col.name: getattr(r.Recommendation, col.name)
+                for col in Recommendation.__table__.columns
+            },
            "scenario_id": r.scenario_id,
-            "materials": []  # placeholder
+            "materials": [],  # placeholder
        }
        for r in recommendations_query
    ]
@ -197,23 +206,25 @@ def get_data(portfolio_id, scenario_ids):
    # --------------------
    # Recommendation materials (SEPARATE QUERY)
    # --------------------
-    materials_query = session.query(
-        RecommendationMaterials
-    ).filter(
-        RecommendationMaterials.recommendation_id.in_(recommendation_ids)
-    ).all()
+    materials_query = (
+        session.query(RecommendationMaterials)
+        .filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids))
+        .all()
+    )

    # Group materials by recommendation_id
    materials_by_recommendation = defaultdict(list)

    for m in materials_query:
-        materials_by_recommendation[m.recommendation_id].append({
-            "material_id": m.material_id,
-            "depth": m.depth,
-            "quantity": m.quantity,
-            "quantity_unit": m.quantity_unit,
-            "estimated_cost": m.estimated_cost,
-        })
+        materials_by_recommendation[m.recommendation_id].append(
+            {
+                "material_id": m.material_id,
+                "depth": m.depth,
+                "quantity": m.quantity,
+                "quantity_unit": m.quantity_unit,
+                "estimated_cost": m.estimated_cost,
+            }
+        )

    # Attach materials safely (no filtering side effects)
    for r in recommendations_data:
@ -236,12 +247,11 @@ with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer:
    recommendations_df.to_excel(writer, sheet_name="recommendations", index=False)
    properties_df.to_excel(writer, sheet_name="properties", index=False)

-    
+
 # solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
 # average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()


-
 # # Check tenures
 # initial_asset_data = pd.read_excel(
 #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
--- a/Project/m_reduced_sample_revised.py
+++ b/Project/m_reduced_sample_revised.py
@ -4,7 +4,7 @@ import pandas as pd
 full_sal = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
    "SAL/Depracated/20260107 corrected batch 6 sal.xlsx",
-    sheet_name="Standardised Asset List"
+    sheet_name="Standardised Asset List",
 )

 # ------Pull in the reduced sample ------
@ -12,7 +12,7 @@ full_sal = pd.read_excel(
 reduced_sal = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - "
    "ownership filtered sal.xlsx",
-    sheet_name="Standardised Asset List"
+    sheet_name="Standardised Asset List",
 )

 # ------ Pull in the confirmed ownership column from Peabody ------
@ -20,18 +20,20 @@ new_asset_data = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
    "- Peabody "
    "- Data Extracts for Domna v2.xlsx",
-    sheet_name="Properties"
+    sheet_name="Properties",
 )

 correct_sample = new_asset_data[
    ~new_asset_data["AH Tenure"].isin(
-        ["Commercial",
-         "Freeholder",
-         "HOMEBUY / EQUITY LOAN",
-         "Leaseholder",
-         "Outright Sale",
-         "SHARED EQUITY",
-         "Shared Ownership"]
+        [
+            "Commercial",
+            "Freeholder",
+            "HOMEBUY / EQUITY LOAN",
+            "Leaseholder",
+            "Outright Sale",
+            "SHARED EQUITY",
+            "Shared Ownership",
+        ]
    )
 ].copy()

@ -41,9 +43,7 @@ stuff_to_add = correct_sample[
    ~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values)
 ]["UPRN"].values

-sal_to_add = full_sal[
-    full_sal["domna_property_id"].isin(stuff_to_add)
-].copy()
+sal_to_add = full_sal[full_sal["domna_property_id"].isin(stuff_to_add)].copy()

 # ------- Stuff to remove -------
 stuff_to_remove = reduced_sal[
@ -88,7 +88,7 @@ from backend.app.db.models.portfolio import PropertyModel
 from backend.app.db.connection import db_session, db_read_session
 from sqlalchemy import select, func
 from sqlalchemy.orm import Session
-from backend.app.db.models.recommendations import Plan
+from backend.app.db.models.recommendations import PlanModel

 uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist()

--- a/etl/customers/slide_utils.py
+++ b/etl/customers/slide_utils.py
@ -7,7 +7,7 @@ from sqlalchemy.sql import true
 from backend.app.db.utils import row2dict
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
 from backend.app.db.models.recommendations import Recommendation
-from backend.app.db.models.recommendations import Plan
+from backend.app.db.models.recommendations import PlanModel
 from backend.app.utils import sap_to_epc

 EPC_COLOURS = {
@ -17,7 +17,7 @@ EPC_COLOURS = {
    "D": "#fdd401",
    "E": "#fdab67",
    "F": "#ee8023",
-    "G": "#e71437"
+    "G": "#e71437",
 }


@ -33,22 +33,27 @@ def get_properties_with_default_recommendations(session: Session, portfolio_id:
             its associated default recommendations if any.
    """
    # Adjust the join to correctly filter recommendations while including all properties
-    query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation,
-                                                                   (Recommendation.property_id == PropertyModel.id) & (
-                                                                       Recommendation.default == true())) \
-        .filter(PropertyModel.portfolio_id == portfolio_id) \
+    query = (
+        session.query(PropertyModel, Recommendation)
+        .outerjoin(
+            Recommendation,
+            (Recommendation.property_id == PropertyModel.id)
+            & (Recommendation.default == true()),
+        )
+        .filter(PropertyModel.portfolio_id == portfolio_id)
        .all()
+    )

    properties = {}
    for property, recommendation in query:
        # Ensure the property is added once with an empty list of recommendations initially
        if property.id not in properties:
            properties[property.id] = row2dict(property)
-            properties[property.id]['recommendations'] = []
+            properties[property.id]["recommendations"] = []

        # Append recommendations if they exist and meet the criteria (already filtered by the query)
        if recommendation and recommendation.default:
-            properties[property.id]['recommendations'].append(row2dict(recommendation))
+            properties[property.id]["recommendations"].append(row2dict(recommendation))

    return list(properties.values())

@ -62,11 +67,16 @@ def get_property_details_by_portfolio_id(session: Session, portfolio_id: int):
    :return: A list of dictionaries, where each dictionary represents a property's details.
             Returns an empty list if no property details are found.
    """
-    property_details = session.query(PropertyDetailsEpcModel).filter(
-        PropertyDetailsEpcModel.portfolio_id == portfolio_id).all()
+    property_details = (
+        session.query(PropertyDetailsEpcModel)
+        .filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id)
+        .all()
+    )

    # Convert the SQLAlchemy objects to dictionaries
-    property_details_dict = [row2dict(pd) for pd in property_details] if property_details else []
+    property_details_dict = (
+        [row2dict(pd) for pd in property_details] if property_details else []
+    )

    return property_details_dict

@ -80,7 +90,9 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
    :return: A list of dictionaries, where each dictionary represents a plan.
             Returns an empty list if no plans are found.
    """
-    plans = session.query(Plan).filter(Plan.portfolio_id == portfolio_id).all()
+    plans = (
+        session.query(PlanModel).filter(PlanModel.portfolio_id == portfolio_id).all()
+    )

    # Convert the SQLAlchemy objects to dictionaries
    plans_dict = [row2dict(plan) for plan in plans] if plans else []
@ -88,7 +100,14 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
    return plans_dict


-def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
+def plot_epc_distribution(
+    df,
+    customer_key,
+    title="Your Units",
+    background_color="white",
+    bar_height=0.4,
+    font_size=15,
+):
    """
    Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes.
    Allows setting the plot background color and dynamically adjusts text size and bar spacing.
@ -100,75 +119,113 @@ def plot_epc_distribution(df, customer_key, title='Your Units', background_color
    :param font_size: Base font size for text annotations (default 15)
    """
    # Calculate dynamic figure size or adjust based on preferences
-    square_size = max(6, len(df) * 0.6)  # Ensure minimum size and adjust based on number of entries
+    square_size = max(
+        6, len(df) * 0.6
+    )  # Ensure minimum size and adjust based on number of entries
    fig, ax = plt.subplots(figsize=(square_size, square_size))
    fig.patch.set_facecolor(background_color)  # Set figure background color
    ax.set_facecolor(background_color)  # Set axes background color

-    df['percentage'] = df['percentage'].round(1)  # Round the percentage values to 1 decimal place
-    df_sorted = df.sort_values('percentage', ascending=True)
+    df["percentage"] = df["percentage"].round(
+        1
+    )  # Round the percentage values to 1 decimal place
+    df_sorted = df.sort_values("percentage", ascending=True)

    # Plot bars with specified height for adjustable thickness
-    bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'],
-                   color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height)
+    bars = ax.barh(
+        df_sorted["current_epc_rating"],
+        df_sorted["percentage"],
+        color=df_sorted["current_epc_rating"].map(EPC_COLOURS),
+        edgecolor="none",
+        height=bar_height,
+    )

-    epc_rating_font_size = font_size * 2  # EPC rating font size larger than base font size
-    count_percentage_font_size = font_size  # Count (percentage) font size as base font size
+    epc_rating_font_size = (
+        font_size * 2
+    )  # EPC rating font size larger than base font size
+    count_percentage_font_size = (
+        font_size  # Count (percentage) font size as base font size
+    )

    # Annotate bars with EPC ratings inside and count with percentage values outside
    for index, bar in enumerate(bars):
        width = bar.get_width()
-        epc_rating = df_sorted.iloc[index]['current_epc_rating']
-        count = df_sorted.iloc[index]['count']
-        percentage = df_sorted.iloc[index]['percentage']
+        epc_rating = df_sorted.iloc[index]["current_epc_rating"]
+        count = df_sorted.iloc[index]["count"]
+        percentage = df_sorted.iloc[index]["percentage"]

        # EPC rating inside the bar with increased font size
-        ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2,
-                f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size)
+        ax.text(
+            width - (width * 0.05),
+            bar.get_y() + bar.get_height() / 2,
+            f"{epc_rating}",
+            va="center",
+            ha="right",
+            color="white",
+            fontsize=epc_rating_font_size,
+        )

        # Count and percentage outside the bar, original font size
-        ax.text(width + 1, bar.get_y() + bar.get_height() / 2,
-                f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size)
+        ax.text(
+            width + 1,
+            bar.get_y() + bar.get_height() / 2,
+            f"{count} ({percentage}%)",
+            va="center",
+            color="black",
+            fontsize=count_percentage_font_size,
+        )

-    ax.set_title(title, fontsize=font_size * 1.2)  # Adjust title font size proportionally
-    ax.tick_params(axis='x', which='both', bottom=False, top=False,
-                   labelbottom=False)  # Remove x-axis tick marks and values
-    ax.tick_params(axis='y', which='both', left=False, right=False,
-                   labelleft=False)  # Remove y-axis tick marks and labels
-    ax.spines['top'].set_visible(False)  # Remove top spine
-    ax.spines['right'].set_visible(False)  # Remove right spine
-    ax.spines['left'].set_visible(False)  # Remove left spine
-    ax.spines['bottom'].set_visible(False)  # Remove bottom spine
+    ax.set_title(
+        title, fontsize=font_size * 1.2
+    )  # Adjust title font size proportionally
+    ax.tick_params(
+        axis="x", which="both", bottom=False, top=False, labelbottom=False
+    )  # Remove x-axis tick marks and values
+    ax.tick_params(
+        axis="y", which="both", left=False, right=False, labelleft=False
+    )  # Remove y-axis tick marks and labels
+    ax.spines["top"].set_visible(False)  # Remove top spine
+    ax.spines["right"].set_visible(False)  # Remove right spine
+    ax.spines["left"].set_visible(False)  # Remove left spine
+    ax.spines["bottom"].set_visible(False)  # Remove bottom spine

    plt.tight_layout()  # Adjust layout
    plt.show()

    # Save the figure as an image
-    figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png'
-    fig.savefig(figure_path, bbox_inches='tight')
+    figure_path = f"etl/customers/{customer_key}/epc_distribution_plot.png"
+    fig.savefig(figure_path, bbox_inches="tight")
    plt.close(fig)  # Close the figure to free memory

    return fig, figure_path


-def save_plot_to_image(figure, path='plot.png'):
+def save_plot_to_image(figure, path="plot.png"):
    """
    Saves a matplotlib figure to an image file for insertion into PowerPoint.
    """
-    figure.savefig(path, bbox_inches='tight')
+    figure.savefig(path, bbox_inches="tight")
    plt.close(figure)


-def save_figure_as_image(figure, filename='temp_plot.png'):
+def save_figure_as_image(figure, filename="temp_plot.png"):
    """
    Saves a matplotlib figure to an image file.
    """
    figure.savefig(filename, dpi=300)
-    plt.close(figure)  # Close the figure to prevent it from displaying in notebooks or Python environments
+    plt.close(
+        figure
+    )  # Close the figure to prevent it from displaying in notebooks or Python environments


-def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inches(1), width_inches=Inches(8),
-                                height_inches=Inches(2)):
+def add_commentary_with_bullets(
+    slide,
+    commentary,
+    top_inches,
+    left_inches=Inches(1),
+    width_inches=Inches(8),
+    height_inches=Inches(2),
+):
    """
    Adds commentary with bullet points to a slide.

@ -179,7 +236,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche
    :param width_inches: The width of the commentary text box.
    :param height_inches: The height of the commentary text box.
    """
-    txBox = slide.shapes.add_textbox(left_inches, top_inches, width_inches, height_inches)
+    txBox = slide.shapes.add_textbox(
+        left_inches, top_inches, width_inches, height_inches
+    )
    tf = txBox.text_frame

    # Configure text frame
@ -192,7 +251,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche

    for i, section in enumerate(sections):
        if i > 0:
-            p = tf.add_paragraph()  # Add a new paragraph for each section after the first
+            p = (
+                tf.add_paragraph()
+            )  # Add a new paragraph for each section after the first
        else:
            p = tf.paragraphs[0]  # Use the first paragraph for the first section
        p.text = section
@ -215,7 +276,9 @@ def add_slide_with_image(prs, title, img_path=None, commentary=None):
    # Determine the position of the commentary text box based on whether an image is included
    if img_path:
        # Add the image
-        slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5))
+        slide.shapes.add_picture(
+            img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5)
+        )
        # Position for commentary when image is present
        commentary_top = Inches(6)
    else:
@ -237,16 +300,18 @@ def create_powerpoint(data, save_location):
    prs = Presentation()

    for slide, slide_data in data.items():
-        slide_figure_path = data[slide].get('image_path')
-        text = data[slide].get('text')
-        title = data[slide].get('title', "")
+        slide_figure_path = data[slide].get("image_path")
+        text = data[slide].get("text")
+        title = data[slide].get("title", "")
        add_slide_with_image(prs, title, slide_figure_path, text)

    # Save the presentation
    prs.save(save_location)


-def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target):
+def create_recommendations_summary(
+    recommendations_df, properties_df, property_details_df, sap_target
+):
    # Aggregate the impact of the recommendations
    # We want:
    # Total number of sap points
@ -254,40 +319,52 @@ def create_recommendations_summary(recommendations_df, properties_df, property_d
    # total bill savings
    # total cost
    # Total Co2 impact
-    recommendations_summary = recommendations_df.groupby(["property_id"]).agg(
-        total_sap_points=("sap_points", "sum"),
-        total_valuation_impact=("property_valuation_increase", "sum"),
-        total_bill_savings=("energy_cost_savings", "sum"),
-        total_cost=("estimated_cost", "sum"),
-        total_carbon=("co2_equivalent_savings", "sum"),
-        adjusted_heat_demand=("adjusted_heat_demand", "sum")
-    ).reset_index()
+    recommendations_summary = (
+        recommendations_df.groupby(["property_id"])
+        .agg(
+            total_sap_points=("sap_points", "sum"),
+            total_valuation_impact=("property_valuation_increase", "sum"),
+            total_bill_savings=("energy_cost_savings", "sum"),
+            total_cost=("estimated_cost", "sum"),
+            total_carbon=("co2_equivalent_savings", "sum"),
+            adjusted_heat_demand=("adjusted_heat_demand", "sum"),
+        )
+        .reset_index()
+    )
    # Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill
    recommendations_summary = recommendations_summary.merge(
-        properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
-        how="left"
+        properties_df[["id", "uprn", "current_sap_points"]].rename(
+            columns={"id": "property_id"}
+        ),
+        on="property_id",
+        how="left",
    )

    recommendations_summary["expected_sap_points"] = (
-        recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
+        recommendations_summary["current_sap_points"]
+        + recommendations_summary["total_sap_points"]
    )
-    recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply(
-        lambda x: sap_to_epc(x)
+    recommendations_summary["expected_epc_rating"] = recommendations_summary[
+        "expected_sap_points"
+    ].apply(lambda x: sap_to_epc(x))
+    recommendations_summary["sap_difference"] = (
+        sap_target - recommendations_summary["expected_sap_points"]
    )
-    recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"]

    if property_details_df is not None:
        recommendations_summary = recommendations_summary.merge(
-            property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename(
+            property_details_df[
+                ["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]
+            ].rename(
                columns={
                    "id": "property_id",
                    "co2_emissions": "current_co2",
                    "adjusted_energy_consumption": "current_energy",
-                    "energy_bill": "current_energy_bill"
+                    "energy_bill": "current_energy_bill",
                }
            ),
            on="uprn",
-            how="left"
+            how="left",
        )

    return recommendations_summary
--- a/infrastructure/terraform/lambda/_template/README.md
+++ b/infrastructure/terraform/lambda/_template/README.md
@ -3,7 +3,7 @@
 ### 1. Create the Lambda scaffold
 - Copy the template:

-  cp -r lambda/_template lambda/<lambda_name>
+  `cp -r lambda/_template lambda/<lambda_name>`

 ---

@ -12,8 +12,7 @@

  infrastructure/terraform/shared/main.tf

- Apply the shared stack
-    - This requires commenting 'if env.stage == "prod"' in .github/workflows/deploy_terraform.yml
+- Create a PR to deploy this to main then dev in order to deploy the shared stack

 - Verify the ECR repository exists in AWS

--- a/infrastructure/terraform/lambda/_template/main.tf
+++ b/infrastructure/terraform/lambda/_template/main.tf
@ -1,3 +1,30 @@
+# ==============================================================================
+# TEMPLATE: Lambda Configuration with Optional S3 IAM Policy
+# ==============================================================================
+# Instructions:
+# 1. Replace "REPLACE ME" with your lambda name (e.g., "my-lambda-name")
+# 2. Add any additional environment variables as needed
+# 3. To attach S3 IAM policies from shared state:
+#    - Uncomment the S3 policy attachment section below
+#    - Update the policy_arn to match the output from shared/main.tf
+#    - Available shared outputs (examples):
+#      - data.terraform_remote_state.shared.outputs.condition_etl_s3_read_arn
+#      - data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn
+# 4. To create a NEW S3 policy:
+#    - Add a new module "lambda_s3_policy" in shared/main.tf using the
+#      s3_iam_policy module (see examples in shared/main.tf)
+#    - Then reference it here using data.terraform_remote_state.shared.outputs
+# ==============================================================================
+
+data "terraform_remote_state" "shared" {
+  backend = "s3"
+  config = {
+    bucket = "assessment-model-terraform-state"
+    key = "env:/${var.stage}/terraform.tfstate"
+    region = "eu-west-2"
+  }
+}
+
 module "lambda" {
  source = "../modules/lambda_with_sqs"

@ -6,9 +33,35 @@ module "lambda" {

  image_uri = local.image_uri

+  # Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000)
+  maximum_concurrency = var.maximum_concurrency
+
+  batch_size = var.batch_size

  environment = {
    STAGE = var.stage
    LOG_LEVEL = "info"
  }
 }
+
+# ======================================================================
+# OPTIONAL: Attach S3 IAM policy to Lambda execution role
+# ======================================================================
+# Uncomment and configure the resource below to attach S3 permissions
+#
+# Example 1: Attach existing policy from shared state
+# resource "aws_iam_role_policy_attachment" "lambda_s3_policy" {
+#   role       = module.lambda.role_name
+#   policy_arn = data.terraform_remote_state.shared.outputs.YOUR_POLICY_OUTPUT_NAME_arn
+# }
+#
+# Example 2: Attach multiple policies
+# resource "aws_iam_role_policy_attachment" "lambda_read_policy" {
+#   role       = module.lambda.role_name
+#   policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn
+# }
+#
+# resource "aws_iam_role_policy_attachment" "lambda_write_policy" {
+#   role       = module.lambda.role_name
+#   policy_arn = data.terraform_remote_state.shared.outputs.another_policy_arn
+# }
--- a/infrastructure/terraform/lambda/_template/variables.tf
+++ b/infrastructure/terraform/lambda/_template/variables.tf
@ -17,6 +17,16 @@ variable "image_digest" {
  description = "Image digest (sha256:...)"
 }

+variable "maximum_concurrency" {
+  type        = number
+  default     = null
+  description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit."
+}
+
+variable "batch_size" {
+  type    = number
+  default = 1
+}

 locals {
  image_uri = "${var.ecr_repo_url}@${var.image_digest}"
--- a/infrastructure/terraform/lambda/address2UPRN/main.tf
+++ b/infrastructure/terraform/lambda/address2UPRN/main.tf
@ -1,3 +1,19 @@
+data "terraform_remote_state" "shared" {
+  backend = "s3"
+  config = {
+    bucket = "assessment-model-terraform-state"
+    key = "env:/${var.stage}/terraform.tfstate"
+    region = "eu-west-2"
+  }
+}
+data "aws_secretsmanager_secret_version" "db_credentials" {
+  secret_id = "${var.stage}/assessment_model/db_credentials"
+}
+
+locals {
+  db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
+}
+
 module "address2uprn" {
  source = "../modules/lambda_with_sqs"

@ -6,9 +22,37 @@ module "address2uprn" {

  image_uri = local.image_uri

+  timeout = 900

-  environment = {
-    STAGE     = var.stage
-    LOG_LEVEL = "info"
-  }
+  # Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000)
+  maximum_concurrency = var.maximum_concurrency
+
+  environment = merge(
+    {
+      STAGE     = var.stage
+      LOG_LEVEL = "info"
+      DB_USERNAME = local.db_credentials.db_assessment_model_username
+      DB_PASSWORD = local.db_credentials.db_assessment_model_password
+      GOOGLE_SOLAR_API_KEY = "test"
+      SAP_PREDICTIONS_BUCKET = "test"
+      CARBON_PREDICTIONS_BUCKET = "test"
+      HEAT_PREDICTIONS_BUCKET = "test"
+      HEATING_KWH_PREDICTIONS_BUCKET = "test"
+      HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
+      API_KEY = "test"
+      ENVIRONMENT = "test"
+      SECRET_KEY = "test"
+      PLAN_TRIGGER_BUCKET = "test"
+      DATA_BUCKET = "test"
+      ENGINE_SQS_URL = "test"
+      ENERGY_ASSESSMENTS_BUCKET = "test"
+      S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
+    },
+  )
 }
+
+# Attach S3 read policy to the Lambda execution role
+resource "aws_iam_role_policy_attachment" "address2uprn_read_and_write" {
+  role       = module.address2uprn.role_name
+  policy_arn = data.terraform_remote_state.shared.outputs.address_2_uprn_s3_read_and_write_arn
+}
--- a/Show more
+++ b/Show more