fixed merge conflicts with main

2026-06-08 11:17:27 +00:00 · 2026-02-07 21:19:40 +00:00 · 2026-02-07 21:19:40 +00:00 · 7bb7972549
commit 7bb7972549
parent 7cf33e4f33 e8abe6b25b
84 changed files with 3370 additions and 1119 deletions
--- a/.devcontainer/asset_list/Dockerfile
+++ b/.devcontainer/asset_list/Dockerfile
@ -0,0 +1,39 @@
+FROM python:3.11.10-bullseye
+
+
+ARG USER=vscode
+ARG DEBIAN_FRONTEND=noninteractive
+
+# 1) Toolchain + utilities for building libpostal
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    sudo jq vim curl git ca-certificates \
+    build-essential pkg-config automake autoconf libtool \
+ && rm -rf /var/lib/apt/lists/*
+
+# # 2) Build and install libpostal from source
+RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
+ && cd /tmp/libpostal \
+ && ./bootstrap.sh \
+ && ./configure --datadir=/usr/local/share/libpostal \
+ && make -j"$(nproc)" \
+ && make install \
+ && ldconfig \
+ && rm -rf /tmp/libpostal
+
+# 3) Create the user and grant sudo privileges
+RUN useradd -m -s /usr/bin/bash ${USER} \
+ && echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \
+ && chmod 0440 /etc/sudoers.d/${USER}
+
+# # 4) Python deps - if you want to run assest list
+ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
+ADD asset_list/requirements.txt requirements.txt
+RUN pip install -r requirements.txt
+
+RUN pip install -r requirements.txt
+# 5) Workdir
+WORKDIR /workspaces/model
+
+# 6) Make Python find your package
+# Add project root to PYTHONPATH for all processes
+ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}
--- a/.devcontainer/asset_list/devcontainer.json
+++ b/.devcontainer/asset_list/devcontainer.json
@ -1,7 +1,7 @@
 {
-  "name": "Basic Python",
+  "name": "SAL ENV",
  "dockerComposeFile": "docker-compose.yml",
-  "service": "model",
+  "service": "model-sal",
  "remoteUser": "vscode",
  "workspaceFolder": "/workspaces/model",
  "postStartCommand": "bash .devcontainer/post-install.sh",
--- a/.devcontainer/asset_list/docker-compose.yml
+++ b/.devcontainer/asset_list/docker-compose.yml
@ -1,14 +1,14 @@
 version: '3.8'

 services:
-  model:
+  model-sal:
    user: "${UID}:${GID}"
    build:
-      context: ..
-      dockerfile: .devcontainer/Dockerfile
+      context: ../..
+      dockerfile: .devcontainer/asset_list/Dockerfile
    command: sleep infinity
    volumes:
-      - ..:/workspaces/model
+      - ../../:/workspaces/model
    networks:
      - model-net

--- a/.devcontainer/asset_list/post-install.sh
+++ b/.devcontainer/asset_list/post-install.sh
--- a/.devcontainer/asset_list/requirements.txt
+++ b/.devcontainer/asset_list/requirements.txt
@ -0,0 +1,24 @@
+fastapi==0.115.2
+sqlalchemy==2.0.36
+psycopg2-binary==2.9.10
+python-jose==3.3.0
+cryptography==43.0.3
+mangum==0.19.0
+# AWS
+boto3==1.35.44
+# Data
+openpyxl==3.1.2
+# Basic
+pytz
+uvicorn[standard]
+# Testing
+pytest==9.0.2
+pytest-cov==7.0.0
+ipykernel>=6.25,<7
+pydantic-settings<2
+pyyaml>=6.0.1
+pydantic>=1.10.7,<2
+sqlmodel
+# Formatting
+black==26.1.0
+dotenv
--- a/.devcontainer/backend/Dockerfile
+++ b/.devcontainer/backend/Dockerfile
@ -34,7 +34,7 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
 ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
 ADD backend/engine/requirements.txt requirements1.txt
 ADD backend/app/requirements/requirements.txt requirements2.txt
-ADD .devcontainer/requirements.txt requirements3.txt
+ADD .devcontainer/backend/requirements.txt requirements3.txt
 RUN cat requirements1.txt requirements2.txt requirements3.txt > requirements.txt
 RUN pip install -r requirements.txt

--- a/.devcontainer/backend/devcontainer.json
+++ b/.devcontainer/backend/devcontainer.json
@ -0,0 +1,40 @@
+{
+  "name": "Backend Model Env",
+  "dockerComposeFile": "docker-compose.yml",
+  "service": "model-backend",
+  "remoteUser": "vscode",
+  "workspaceFolder": "/workspaces/model",
+  "postStartCommand": "bash .devcontainer/backend/post-install.sh",
+  "mounts": [
+    "source=${localEnv:HOME},target=/workspaces/home,type=bind"
+  ],
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "ms-python.python",
+        "ms-toolsai.jupyter",
+        "mechatroner.rainbow-csv",
+        "ms-toolsai.datawrangler",
+        "lindacong.vscode-book-reader",
+        "4ops.terraform",
+        "fabiospampinato.vscode-todo-plus",
+        "jgclark.vscode-todo-highlight",
+        "corentinartaud.pdfpreview",
+        "ms-python.vscode-python-envs",
+        "ms-python.black-formatter",
+        "waderyan.gitblame"
+      ],
+      "settings": {
+        "files.defaultWorkspace": "/workspaces/model",
+        "[python]": {
+          "editor.defaultFormatter": "ms-python.black-formatter",
+          "editor.formatOnSave": true
+        },
+        "python.formatting.provider": "none"
+      }
+    }
+  },
+  "containerEnv": {
+    "PYTHONFLAGS": "-Xfrozen_modules=off"
+  }
+}
--- a/.devcontainer/backend/docker-compose.yml
+++ b/.devcontainer/backend/docker-compose.yml
@ -0,0 +1,28 @@
+version: '3.8'
+
+services:
+  model-backend:
+    user: "${UID}:${GID}"
+    build:
+      context: ../..
+      dockerfile: .devcontainer/backend/Dockerfile
+    command: sleep infinity
+    volumes:
+      - ../../:/workspaces/model
+
+
+  db:
+    image: postgres:17.4
+    restart: unless-stopped
+    ports:
+      - 5432:5432
+    environment:
+      - PGDATABASE=tech_team_local_db
+      - POSTGRES_USER=postgres
+      - POSTGRES_PASSWORD=makingwarmerhomes
+    volumes:
+      - postgres-data-two:/var/lib/postgresql/data
+
+
+volumes:
+  postgres-data-two:
--- a/.devcontainer/backend/post-install.sh
+++ b/.devcontainer/backend/post-install.sh
@ -0,0 +1,14 @@
+mkdir -p ~/.ipython/profile_default/startup
+
+cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py
+from dotenv import load_dotenv
+import os
+
+# Adjust path as needed
+env_path = "/workspaces/model/backend/.env"
+if os.path.exists(env_path):
+    load_dotenv(env_path)
+    print("✔ Loaded .env into Jupyter kernel")
+else:
+    print("⚠ No .env file found to load")
+EOF
--- a/.devcontainer/backend/requirements.txt
+++ b/.devcontainer/backend/requirements.txt
@ -1,4 +1,4 @@
-# fastapi
+
 fastapi==0.115.2
 sqlalchemy==2.0.36
 pydantic-settings==2.6.0
--- a/.github/workflows/_build_image.yml
+++ b/.github/workflows/_build_image.yml
@ -0,0 +1,78 @@
+name: Build Docker image
+
+on:
+  workflow_call:
+    inputs:
+      ecr_repo:
+        required: true
+        type: string
+      dockerfile_path:
+        required: true
+        type: string
+      build_context:
+        required: false
+        default: "."
+        type: string
+
+    outputs:
+      image_digest:
+        description: "Pushed image digest"
+        value: ${{ jobs.build.outputs.image_digest }}
+      ecr_repo_url:
+        description: "ECR repository URL"
+        value: ${{ jobs.build.outputs.ecr_repo_url }}
+
+    secrets:
+      AWS_ACCESS_KEY_ID:
+        required: true
+      AWS_SECRET_ACCESS_KEY:
+        required: true
+      AWS_REGION:
+        required: true
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    outputs:
+      image_digest: ${{ steps.digest.outputs.image_digest }}
+      ecr_repo_url: ${{ steps.repo.outputs.ecr_repo_url }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ secrets.AWS_REGION }}
+
+      - uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Resolve ECR repo URL
+        id: repo
+        run: |
+          AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
+
+          ECR_REPO_URL="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${{ inputs.ecr_repo }}"
+
+          echo "Resolved ECR repo URL (local var):"
+          echo "$ECR_REPO_URL"
+
+          echo "ecr_repo_url=$ECR_REPO_URL" >> "$GITHUB_OUTPUT"
+
+      - name: Build & push image
+        run: |
+          IMAGE_URI="${{ steps.repo.outputs.ecr_repo_url }}:${GITHUB_SHA}"
+          docker build -f ${{ inputs.dockerfile_path }} -t $IMAGE_URI ${{ inputs.build_context }}
+          docker push $IMAGE_URI
+
+      - name: Resolve image digest
+        id: digest
+        run: |
+          DIGEST=$(aws ecr describe-images \
+            --repository-name ${{ inputs.ecr_repo }} \
+            --image-ids imageTag=${GITHUB_SHA} \
+            --query 'imageDetails[0].imageDigest' \
+            --output text)
+          echo "image_digest=$DIGEST" >> "$GITHUB_OUTPUT"
--- a/.github/workflows/_deploy_lambda.yml
+++ b/.github/workflows/_deploy_lambda.yml
@ -0,0 +1,91 @@
+name: Deploy Lambda (Terraform)
+
+on:
+  workflow_call:
+    inputs:
+      lambda_name:
+        required: true
+        type: string
+
+      lambda_path:
+        required: true
+        type: string
+
+      stage:
+        required: true
+        type: string
+
+      ecr_repo:
+        required: true
+        type: string
+
+      image_digest:
+        required: true
+        type: string
+
+    secrets:
+      AWS_ACCESS_KEY_ID:
+        required: true
+      AWS_SECRET_ACCESS_KEY:
+        required: true
+      AWS_REGION:
+        required: true
+
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Debug inputs
+        run: |
+          echo "lambda_name=${{ inputs.lambda_name }}"
+          echo "lambda_path=${{ inputs.lambda_path }}"
+          echo "stage=${{ inputs.stage }}"
+          echo "ecr_repo_url=${{ inputs.ecr_repo_url }}"
+          echo "image_digest=${{ inputs.image_digest }}"
+
+
+      - uses: aws-actions/configure-aws-credentials@v4
+        with:
+          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+          aws-region: ${{ secrets.AWS_REGION }}
+
+      - uses: hashicorp/setup-terraform@v3
+
+      - uses: aws-actions/amazon-ecr-login@v2
+
+      - name: Resolve ECR repo URL
+        id: repo
+        env:
+          AWS_REGION: ${{ secrets.AWS_REGION }}
+        run: |
+          AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
+          ECR_REPO_URL="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${{ inputs.ecr_repo }}"
+          echo "ecr_repo_url=$ECR_REPO_URL" >> "$GITHUB_OUTPUT"
+
+      - name: Terraform Init
+        working-directory: ${{ inputs.lambda_path }}
+        run: terraform init -reconfigure
+
+      - name: Terraform Workspace
+        working-directory: ${{ inputs.lambda_path }}
+        run: |
+          terraform workspace select ${{ inputs.stage }} \
+            || terraform workspace new ${{ inputs.stage }}
+
+      - name: Terraform Plan
+        working-directory: ${{ inputs.lambda_path }}
+        run: |
+          terraform plan \
+            -var="stage=${{ inputs.stage }}" \
+            -var="lambda_name=${{ inputs.lambda_name }}" \
+            -var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \
+            -var="image_digest=${{ inputs.image_digest }}" \
+            -out=lambdaplan
+
+      - name: Terraform Apply
+        working-directory: ${{ inputs.lambda_path }}
+        run: terraform apply -auto-approve lambdaplan
--- a/.github/workflows/deploy_terraform.yml
+++ b/.github/workflows/deploy_terraform.yml
@ -1,80 +1,98 @@
-name: Deploy terraform stack
+name: Deploy infrastructure

 on:
  push:
    branches:
-      - dev
-      - prod
+      - "**"

 jobs:
-  deploy:
+  determine_stage:
    runs-on: ubuntu-latest
+    outputs:
+      stage: ${{ steps.set-stage.outputs.stage }}
+
    steps:
-      - name: Checkout
-        uses: actions/checkout@v2
-
-      - name: Setup AWS credentials file
+      - name: Determine stage from branch
+        id: set-stage
+        shell: bash
        run: |
-          mkdir -p ~/.aws
-          echo "[DevAdmin]" > ~/.aws/credentials
-          echo "aws_access_key_id = ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}" >> ~/.aws/credentials
-          echo "aws_secret_access_key = ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}" >> ~/.aws/credentials
-          echo "[ProdAdmin]" >> ~/.aws/credentials
-          echo "aws_access_key_id = ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}" >> ~/.aws/credentials
-          echo "aws_secret_access_key = ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}" >> ~/.aws/credentials
+          BRANCH="${GITHUB_REF_NAME}"

-      - name: Setup AWS config file
-        run: |
-          echo "[profile DevAdmin]" > ~/.aws/config
-          echo "region = eu-west-2" >> ~/.aws/config
-          echo "[profile ProdAdmin]" >> ~/.aws/config
-          echo "region = eu-west-2" >> ~/.aws/config
+          if [[ "$BRANCH" == "prod" ]]; then
+            echo "stage=prod" >> "$GITHUB_OUTPUT"

-      - name: Setup Terraform
-        uses: hashicorp/setup-terraform@v1
-        with:
-          terraform_version: 1.5.2
+          elif [[ "$BRANCH" == "dev" ]]; then
+            echo "stage=dev" >> "$GITHUB_OUTPUT"

-      - name: Configure AWS credentials (DevAdmin)
-        uses: aws-actions/configure-aws-credentials@v1
+          else
+            echo "stage=dev" >> "$GITHUB_OUTPUT"
+          fi
+
+  # ============================================================
+  # 1️⃣ Shared Terraform (infra)
+  # ============================================================
+  shared_terraform:
+    needs: determine_stage
+    runs-on: ubuntu-latest
+    env:
+      STAGE: ${{ needs.determine_stage.outputs.stage }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: aws-actions/configure-aws-credentials@v4
        with:
          aws-access-key-id: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
-          aws-region: eu-west-2
-        env:
-          AWS_PROFILE: "DevAdmin"
+          aws-region: ${{ secrets.DEV_AWS_REGION }}
+
+      - uses: hashicorp/setup-terraform@v3

      - name: Terraform Init
-        run: cd infrastructure/terraform && terraform init
+        working-directory: infrastructure/terraform/shared
+        run: terraform init -reconfigure

      - name: Terraform Workspace
-        run: |
-          BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
-          cd infrastructure/terraform
-          terraform workspace select ${BRANCH_NAME} || terraform workspace new ${BRANCH_NAME}
+        working-directory: infrastructure/terraform/shared
+        run: terraform workspace select ${STAGE} || terraform workspace new ${STAGE}

      - name: Terraform Plan
-        run: |
-          BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
-          cd infrastructure/terraform && terraform plan -var-file=${BRANCH_NAME}.tfvars
+        working-directory: infrastructure/terraform/shared
+        run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan

-      - name: Deploy to Dev
-        if: github.ref == 'refs/heads/dev'
-        run: cd infrastructure/terraform && terraform apply -var-file=dev.tfvars -auto-approve
-        env:
-          name: dev
+      - name: Terraform Apply
+        if: env.STAGE == 'prod'
+        working-directory: infrastructure/terraform/shared
+        run: terraform apply -auto-approve tfplan

-      - name: Configure AWS credentials (ProdAdmin)
-        uses: aws-actions/configure-aws-credentials@v1
+  # ============================================================
+  # 2️⃣ Build Address 2 UPRN image and Push
+  # ============================================================
+  address2uprn_image:
+    needs: [determine_stage, shared_terraform]
+    uses: ./.github/workflows/_build_image.yml
    with:
-          aws-access-key-id: ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}
-          aws-secret-access-key: ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}
-          aws-region: eu-west-2
-        env:
-          AWS_PROFILE: "ProdAdmin"
+      ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
+      dockerfile_path: backend/address2UPRN/Dockerfile
+      build_context: backend/address2UPRN
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}

-      - name: Deploy to Prod
-        if: github.ref == 'refs/heads/prod'
-        run: cd infrastructure/terraform && terraform apply -var-file=prod.tfvars -auto-approve
-        env:
-          name: prod
+  # ============================================================
+  # 3️⃣ Deploy Address 2 UPRN Lambda
+  # ============================================================
+  address2uprn_lambda:
+    needs: [address2uprn_image, determine_stage]
+    uses: ./.github/workflows/_deploy_lambda.yml
+    with:
+      lambda_name: address2uprn
+      lambda_path: infrastructure/terraform/lambda/address2UPRN
+      stage: ${{ needs.determine_stage.outputs.stage }}
+      ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
+      image_digest: ${{ needs.address2uprn_image.outputs.image_digest }}
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@ -2,6 +2,12 @@ name: Run unit tests

 on:
  pull_request:
+    branches:
+      - "**"
+  push:
+    branches:
+      - "**"
+

 jobs:
  test:
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -9,9 +9,12 @@
            "path": "/bin/bash"
        }
    },
+<<<<<<< HEAD
+=======
    "python.testing.unittestEnabled": false,
    "python.testing.pytestEnabled": true,
    "python.testing.pytestArgs": ["-s", "-q", "--no-cov"]
+>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d

    // Hot reload setting that needs to be in user settings
    // "jupyter.runStartupCommands": [
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@ -34,7 +34,8 @@ from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
 logger = setup_logger()

 # OpenAI API Key (set this in your environment variables for security)
-OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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")
+


 class DataRemapper:
@ -1160,12 +1161,16 @@ class AssetList:
            axis=1
        )
        
+        col = self.EPC_API_DATA_NAMES["roof-description"]
+
        self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = self.standardised_asset_list.apply(
-            lambda x: RoofAttributes(description=x[self.EPC_API_DATA_NAMES["roof-description"]]).process()[
+            lambda x: RoofAttributes(description=x[col]).process()[
                "insulation_thickness"] if not pd.isnull(
-                x[self.EPC_API_DATA_NAMES["roof-description"]]) else None,
+                x[col]) else None,
            axis=1
        )
+
+
        self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = (
            self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].str.replace("+", "")
        )
--- a/asset_list/DataMapper.py
+++ b/asset_list/DataMapper.py
@ -1,5 +1,5 @@
 # OpenAI API Key (set this in your environment variables for security)
-OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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")


 class DataRemapper:
--- a/asset_list/init.py
+++ b/asset_list/init.py
--- a/asset_list/app.py
+++ b/asset_list/app.py
@ -14,22 +14,32 @@ from dotenv import load_dotenv
 from backend.SearchEpc import SearchEpc

 load_dotenv(dotenv_path="backend/.env")
-EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
+EPC_AUTH_TOKEN = os.getenv(
+    "EPC_AUTH_TOKEN",
+)


-def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"):
+def extract_address1(
+    asset_list, full_address_col, postcode_col, method="first_two_words"
+):
    if method == "first_two_words":
-        asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
+        asset_list["address1_extracted"] = (
+            asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
+        )
        return asset_list

    if method == "first_word":
-        asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0]
+        asset_list["address1_extracted"] = (
+            asset_list[full_address_col].str.split(" ").str[0]
+        )
        return asset_list

    if method == "house_number_extraction":
        asset_list["address1_extracted"] = asset_list.apply(
-            lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]),
-            axis=1
+            lambda x: SearchEpc.get_house_number(
+                address=x[full_address_col], postcode=x[postcode_col]
+            ),
+            axis=1,
        )
        return asset_list

@ -59,21 +69,20 @@ def app():
    Property UPRN
    """

-    # Fairhive
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Fairhive"
-    data_filename = "Fairhive Asset list.xlsx"
-    sheet_name = "Sheet1"
-    postcode_column = 'POSTCODE'
-    address1_column = "ADDRESS"
+    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney"
+    data_filename = "Domna SHF Wave 3 (3).xlsx"
+    sheet_name = "Domna Wave 3"
+    postcode_column = "Postcode"
+    address1_column = "Address 1"
    address1_method = None
-    fulladdress_column = 'ADDRESS'
-    address_cols_to_concat = []
+    fulladdress_column = None
+    address_cols_to_concat = ["Address 1"]
    missing_postcodes_method = None
-    landlord_year_built = None
-    landlord_os_uprn = None
-    landlord_property_type = "PROPERTY TYPE"
-    landlord_built_form = None
-    landlord_wall_construction = None
+    landlord_year_built = "Construction Years"
+    landlord_os_uprn = "UPRN"
+    landlord_property_type = "Type"
+    landlord_built_form = "Attachment"
+    landlord_wall_construction = "Wall type"
    landlord_roof_construction = None
    landlord_heating_system = None
    landlord_existing_pv = None
@ -93,93 +102,28 @@ def app():
    asset_list_header = 0
    landlord_block_reference = None

-    # Hyde
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hyde/Minor Works"
-    data_filename = "Hyde Group - Domna Minor Works Programme List.xlsx"
-    sheet_name = "Sheet1"
-    postcode_column = 'Postcode'
-    address1_column = None
-    address1_method = "house_number_extraction"
-    fulladdress_column = 'Address'
-    address_cols_to_concat = []
-    missing_postcodes_method = None
-    landlord_year_built = "Age"
-    landlord_os_uprn = None
-    landlord_property_type = "Property Type"
-    landlord_built_form = "Property Type"
-    landlord_wall_construction = "Walls"
-    landlord_roof_construction = "Roofs"
-    landlord_heating_system = "Heating"
-    landlord_existing_pv = "Renewables"
-    landlord_property_id = "Organisation Reference"
-    landlord_sap = "SAP (10)"
-    outcomes_filename = None
-    outcomes_sheetname = None
-    outcomes_postcode = None
-    outcomes_houseno = None
-    outcomes_id = None
-    outcomes_address = None
-    master_filepaths = []
-    master_id_colnames = []
-    master_to_asset_list_filepath = None
-    phase = False
-    ecosurv_landlords = None
-    asset_list_header = 0
-    landlord_block_reference = None
-
-    data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA/20260129 SAL"
-    data_filename = "NCHA ASSET LIST 1.xlsx"
-    sheet_name = "NCHA ASSET LIST"
-    postcode_column = 'POSTCODE'
-    address1_column = None
-    address1_method = "house_number_extraction"
-    fulladdress_column = 'ADDRESS'
-    address_cols_to_concat = []
-    missing_postcodes_method = None
-    landlord_year_built = None
-    landlord_os_uprn = None
-    landlord_property_type = "PROPERTY TYPE"
-    landlord_built_form = "BUILD FORM"
-    landlord_wall_construction = "wall combined"
-    landlord_roof_construction = None
-    landlord_heating_system = None
-    landlord_existing_pv = None
-    landlord_property_id = "UPRN"
-    landlord_sap = None
-    outcomes_filename = None
-    outcomes_sheetname = None
-    outcomes_postcode = None
-    outcomes_houseno = None
-    outcomes_id = None
-    outcomes_address = None
-    master_filepaths = []
-    master_id_colnames = []
-    master_to_asset_list_filepath = None
-    phase = False
-    ecosurv_landlords = None
-    asset_list_header = 0
-    landlord_block_reference = None
-
    # Peabody data for cleaning
-    data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
-                   "Project/data_validation")
+    data_folder = (
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
+        "Project/data_validation"
+    )
    data_filename = "to_standardise_uprns.xlsx"
    sheet_name = "Sheet1"
-    postcode_column = 'Postcode'
-    address1_column = "Address 1"
-    address1_method = None
-    fulladdress_column = None
-    address_cols_to_concat = ["Address 1", "Address 2", "Address 3"]
+    postcode_column = "Postcode"
+    address1_column = None
+    address1_method = "house_number_extraction"
+    fulladdress_column = "Address"
+    address_cols_to_concat = None
    missing_postcodes_method = None
    landlord_year_built = None
    landlord_os_uprn = None
-    landlord_property_type = "Type"
-    landlord_built_form = "Attachment"
+    landlord_property_type = None
+    landlord_built_form = None
    landlord_wall_construction = None
    landlord_roof_construction = None
    landlord_heating_system = None
    landlord_existing_pv = None
-    landlord_property_id = "Org Ref"
+    landlord_property_id = "LLUPRN"
    landlord_sap = None
    outcomes_filename = None
    outcomes_sheetname = None
@ -195,40 +139,6 @@ def app():
    asset_list_header = 0
    landlord_block_reference = None

-    # Lambeth:
-    # data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lambeth/December 10th"
-    # data_filename = "lambeth_sw2_leigham court estate.xlsx"
-    # sheet_name = "Sheet1"
-    # postcode_column = 'Postcode'
-    # address1_column = "Address"
-    # address1_method = None
-    # fulladdress_column = None
-    # address_cols_to_concat = ["Address"]
-    # missing_postcodes_method = None
-    # landlord_year_built = None
-    # landlord_os_uprn = None
-    # landlord_property_type = None
-    # landlord_built_form = None
-    # landlord_wall_construction = None
-    # landlord_roof_construction = None
-    # landlord_heating_system = None
-    # landlord_existing_pv = None
-    # landlord_property_id = "row_id"
-    # landlord_sap = None
-    # outcomes_filename = None
-    # outcomes_sheetname = None
-    # outcomes_postcode = None
-    # outcomes_houseno = None
-    # outcomes_id = None
-    # outcomes_address = None
-    # master_filepaths = []
-    # master_id_colnames = []
-    # master_to_asset_list_filepath = None
-    # phase = False
-    # ecosurv_landlords = None
-    # asset_list_header = 0
-    # landlord_block_reference = None
-
    # Maps addresses to uprn in problematic cases
    manual_uprn_map = {}

@ -253,49 +163,62 @@ def app():
        landlord_existing_pv=landlord_existing_pv,
        landlord_sap=landlord_sap,
        landlord_block_reference=landlord_block_reference,
-        phase=phase
+        phase=phase,
    )
    asset_list.init_standardise()

    # We produce the new maps, which can be saved for future useage
    new_property_type_map = {
-        k: v for k, v in (
-            asset_list.variable_mappings[asset_list.landlord_property_type] if
-            asset_list.landlord_property_type else {}
+        k: v
+        for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_property_type]
+            if asset_list.landlord_property_type
+            else {}
        ).items()
        if k not in PROPERTY_MAPPING
    }
    new_built_form_map = {
-        k: v for k, v in (
-            asset_list.variable_mappings[asset_list.landlord_built_form] if
-            asset_list.landlord_built_form else {}
+        k: v
+        for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_built_form]
+            if asset_list.landlord_built_form
+            else {}
        ).items()
        if k not in BUILT_FORM_MAPPINGS
    }
    new_wall_map = {
-        k: v for k, v in (
-            asset_list.variable_mappings[asset_list.landlord_wall_construction] if
-            asset_list.landlord_wall_construction else {}
+        k: v
+        for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_wall_construction]
+            if asset_list.landlord_wall_construction
+            else {}
        ).items()
        if k not in WALL_CONSTRUCTION_MAPPINGS
    }
    new_heating_map = {
-        k: v for k, v in (
-            asset_list.variable_mappings[asset_list.landlord_heating_system] if
-            asset_list.landlord_heating_system else {}
+        k: v
+        for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_heating_system]
+            if asset_list.landlord_heating_system
+            else {}
        ).items()
        if k not in HEATING_MAPPINGS
    }
    new_existing_pv_map = {
-        k: v for k, v in (
-            asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
+        k: v
+        for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_existing_pv]
+            if asset_list.landlord_existing_pv
+            else {}
        ).items()
        if k not in EXISTING_PV_MAPPINGS
    }
    new_roof_construction_map = {
-        k: v for k, v in (
-            asset_list.variable_mappings[asset_list.landlord_roof_construction] if
-            asset_list.landlord_roof_construction else {}
+        k: v
+        for k, v in (
+            asset_list.variable_mappings[asset_list.landlord_roof_construction]
+            if asset_list.landlord_roof_construction
+            else {}
        ).items()
        if k not in ROOF_CONSTRUCTION_MAPPINGS
    }
@ -309,7 +232,7 @@ def app():
        outcomes_address=outcomes_address,
        outcomes_postcode=outcomes_postcode,
        outcomes_houseno=outcomes_houseno,
-        outcomes_id=outcomes_id
+        outcomes_id=outcomes_id,
    )

    asset_list.flag_survey_master(
@ -343,14 +266,16 @@ def app():
        skip = max(chunk_indexes)

    if any(x in folder_contents for x in downloaded_files):
-        skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents])
+        skip = max(
+            [i for i in chunk_indexes if filename.format(i=i) in folder_contents]
+        )

    for i in range(0, len(asset_list.standardised_asset_list), chunk_size):
        print(f"Processing chunk {i} to {i + chunk_size}")
        if skip is not None and not force_retrieve_data:
            if i <= skip:
                continue
-        chunk = asset_list.standardised_asset_list[i:i + chunk_size]
+        chunk = asset_list.standardised_asset_list[i : i + chunk_size]
        epc_data_chunk, errors_chunk, no_epc_chunk = get_data(
            df=chunk,
            row_id_name=asset_list.DOMNA_PROPERTY_ID,
@ -362,7 +287,7 @@ def app():
            built_form_column=AssetList.STANDARD_BUILT_FORM,
            manual_uprn_map=manual_uprn_map,
            epc_api_only=epc_api_only,
-            epc_auth_token=EPC_AUTH_TOKEN
+            epc_auth_token=EPC_AUTH_TOKEN,
        )

        # We now retrieve any failed properties
@ -385,7 +310,9 @@ def app():

        # Append the failed data to the main data
        # Store the chunk locally as a csv
-        pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False)
+        pd.DataFrame(epc_data_chunk).to_csv(
+            os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False
+        )
        # Store the errors and no-data locally
        with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f:
            json.dump(errors_chunk, f)
@ -416,7 +343,9 @@ def app():

    unique_recommendations = set()
    for _, row in recommendations_df.iterrows():
-        unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
+        unique_recommendations.update(
+            [rec["improvement-summary-text"] for rec in row["recommendations"]]
+        )

    columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations)
    transformed_data = []
@ -436,20 +365,24 @@ def app():
    transformed_df = pd.DataFrame(transformed_data)
    for col in [
        "Floor insulation (solid floor)",
-        "Floor insulation", "Floor insulation (suspended floor)"
+        "Floor insulation",
+        "Floor insulation (suspended floor)",
    ]:
        if col not in transformed_df.columns:
            transformed_df[col] = False
    transformed_df = transformed_df[
        [
-            asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)",
-            "Floor insulation", "Floor insulation (suspended floor)"
+            asset_list.DOMNA_PROPERTY_ID,
+            "Floor insulation (solid floor)",
+            "Floor insulation",
+            "Floor insulation (suspended floor)",
        ]
    ]

    transformed_df["epc_has_floor_recommendation"] = (
-        transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] |
-        transformed_df["Floor insulation (suspended floor)"]
+        transformed_df["Floor insulation (solid floor)"]
+        | transformed_df["Floor insulation"]
+        | transformed_df["Floor insulation (suspended floor)"]
    )

    # Get the find my epc data
@ -462,21 +395,20 @@ def app():
            find_my_epc_data.append(
                {
                    asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID],
-                    **x["find_my_epc_data"]
+                    **x["find_my_epc_data"],
                }
            )
        else:
            find_my_epc_data.append(
-                {
-                    asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]
-                }
+                {asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]}
            )

    find_my_epc_data = pd.DataFrame(find_my_epc_data)

    find_my_epc_data = find_my_epc_data.merge(
        transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]],
-        how="left", on=asset_list.DOMNA_PROPERTY_ID
+        how="left",
+        on=asset_list.DOMNA_PROPERTY_ID,
    )

    # We check if we get the solar pv column:
@ -486,27 +418,33 @@ def app():
    # Retrieve just the data we need
    epc_df = epc_df[
        [asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
-        ].rename(
-        columns=asset_list.EPC_API_DATA_NAMES
-    )
+    ].rename(columns=asset_list.EPC_API_DATA_NAMES)

    # Look for columns not in the find my EPC data, which will have happened if we didn't
    # retrieve it in the first place
-    missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns]
+    missed_find_epc_cols = [
+        c
+        for c in list(asset_list.FIND_EPC_DATA_NAMES.keys())
+        if c not in find_my_epc_data.columns
+    ]
    if missed_find_epc_cols:
        for c in missed_find_epc_cols:
            find_my_epc_data[c] = None

    epc_df = epc_df.merge(
        find_my_epc_data[
-            [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())
-            ]
-        .rename(columns=asset_list.FIND_EPC_DATA_NAMES),
+            [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]
+            + list(asset_list.FIND_EPC_DATA_NAMES.keys())
+        ].rename(columns=asset_list.FIND_EPC_DATA_NAMES),
        how="left",
-        on=asset_list.DOMNA_PROPERTY_ID
+        on=asset_list.DOMNA_PROPERTY_ID,
    )

    asset_list.merge_data(epc_df)
+    # asset_list.standardised_asset_list = asset_list.standardised_asset_list[
+    #     asset_list.standardised_asset_list["domna_full_address"]
+    #     != "120 Airdrie Crescent, Burnley, Lancashire"
+    # ]
    asset_list.extract_attributes()
    asset_list.identify_worktypes()

@ -516,7 +454,10 @@ def app():
    asset_list.get_work_figures()

    # Store as an excel
-    filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
+    filename = (
+        os.path.join(data_folder, ".".join(data_filename.split(".")[:-1]))
+        + " - Standardised.xlsx"
+    )
    # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data

    # Determine inspections priority
@ -540,26 +481,42 @@ def app():
    # )

    with pd.ExcelWriter(filename) as writer:
-        asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
+        asset_list.standardised_asset_list.to_excel(
+            writer, sheet_name="Standardised Asset List", index=False
+        )
        if asset_list.block_analysis_df is not None:
-            asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
+            asset_list.block_analysis_df.to_excel(
+                writer, sheet_name="Block Analysis", index=False
+            )
        # If we have outcomes, we add a tab with the outcomes
        if not asset_list.outcomes_for_output.empty:
-            asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
+            asset_list.outcomes_for_output.to_excel(
+                writer, sheet_name="Outcomes", index=False
+            )

        if not asset_list.unmatched_submissions.empty:
-            asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
+            asset_list.unmatched_submissions.to_excel(
+                writer, sheet_name="Unmatched Submissions", index=False
+            )

        if not asset_list.outcomes_no_match.empty:
-            asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False)
+            asset_list.outcomes_no_match.to_excel(
+                writer, sheet_name="Unmatched Outcomes", index=False
+            )

        if not asset_list.ecosurv_no_match.empty:
-            asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
+            asset_list.ecosurv_no_match.to_excel(
+                writer, sheet_name="Unmatched Ecosurv", index=False
+            )

        if not asset_list.geographical_areas.empty:
-            asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
+            asset_list.geographical_areas.to_excel(
+                writer, sheet_name="Geographical Areas", index=False
+            )

        # Store dupes
        if asset_list.duplicated_addresses is not None:
            if not asset_list.duplicated_addresses.empty:
-                asset_list.duplicated_addresses.to_excel(writer, sheet_name="Duplicate Properties", index=False)
+                asset_list.duplicated_addresses.to_excel(
+                    writer, sheet_name="Duplicate Properties", index=False
+                )
--- a/asset_list/requirements.txt
+++ b/asset_list/requirements.txt
@ -1,7 +1,6 @@
 postal
 pandas
 usaddress
-pydantic-settings==2.6.0
 epc-api-python==1.0.2
 thefuzz
 boto3
@ -10,6 +9,5 @@ openai>=1.3.5
 tiktoken
 msgpack
 beautifulsoup4
-pydantic>=1.10.7
 typing-extensions>=4.5.0
 requests>=2.28.2
--- a/backend/.env.local
+++ b/backend/.env.local
@ -0,0 +1,22 @@
+DB_HOST=db
+DB_PORT=5432
+DB_NAME=tech_team_local_db
+DB_USERNAME=postgres
+DB_PASSWORD=makingwarmerhomes
+
+
+#not used
+GOOGLE_SOLAR_API_KEY="test"
+SAP_PREDICTIONS_BUCKET="test"
+CARBON_PREDICTIONS_BUCKET="test"
+HEAT_PREDICTIONS_BUCKET="test"
+HEATING_KWH_PREDICTIONS_BUCKET="test"
+HOTWATER_KWH_PREDICTIONS_BUCKET="test"
+API_KEY="test"
+ENVIRONMENT="test"
+SECRET_KEY="test"
+PLAN_TRIGGER_BUCKET="test"
+DATA_BUCKET="test"
+EPC_AUTH_TOKEN="test"
+ENGINE_SQS_URL="test"
+ENERGY_ASSESSMENTS_BUCKET="test"
--- a/backend/address2UPRN/Dockerfile
+++ b/backend/address2UPRN/Dockerfile
@ -0,0 +1,7 @@
+FROM public.ecr.aws/lambda/python:3.10
+
+# Copy function code
+COPY main.py .
+
+# Set the handler
+CMD ["main.handler"]
--- a/backend/address2UPRN/README.md
+++ b/backend/address2UPRN/README.md
@ -0,0 +1,20 @@
+We have list of address as input.
+
+It'll come in batches of the same post code and from then we want to somehow convert that into UPRN
+
+if this lambda/function can do that we'll be speeding ahead
+
+
+Energy Performance Information: https://epc.opendatacommunities.org/
+
+guidance page: https://epc.opendatacommunities.org/docs/guidance#field_domestic_LMK_KEY
+
+Example of past khalims code that he wrote some tests for: https://github.com/Hestia-Homes/Model/blob/941be42b83a590e838fd3ee475bfd1ff31438789/backend/tests/test_search_epc.py#L11
+
+
+Example of EPC search: https://github.com/Hestia-Homes/Model/blob/941be42b83a590e838fd3ee475bfd1ff31438789/backend/SearchEpc.py#L118
+
+
+
+Khalim has made a python package to help scrape data: https://github.com/KhalimCK/epc-api-python
+
--- a/backend/address2UPRN/init.py
+++ b/backend/address2UPRN/init.py
--- a/backend/address2UPRN/main.py
+++ b/backend/address2UPRN/main.py
@ -0,0 +1,567 @@
+from epc_api.client import EpcClient
+import os
+from urllib.parse import urlencode
+import pandas as pd
+from difflib import SequenceMatcher
+from tqdm import tqdm
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+import re
+
+EPC_AUTH_TOKEN = os.getenv(
+    "EPC_AUTH_TOKEN",
+)
+
+if EPC_AUTH_TOKEN is None:
+    raise RuntimeError("EPC_AUTH_TOKEN not defined in env")
+
+import re
+from difflib import SequenceMatcher
+from typing import Set
+
+
+def levenshtein(a: str, b: str) -> float:
+    """
+    Address similarity score in [0, 1].
+
+    Strategy:
+    - Normalise
+    - Strongly penalise mismatched house/flat numbers
+    - Combine token overlap + character similarity
+    """
+
+    def extract_number_sequence(s: str) -> list[str]:
+        return re.findall(r"\d+[a-z]?", s)
+
+    def extract_numbers(s: str) -> Set[str]:
+        return set(extract_number_sequence(s))
+
+    def tokenise(s: str) -> Set[str]:
+        return set(s.split())
+
+    def extract_building_number(s: str) -> str | None:
+        """
+        Extract the main building number (NOT flat/unit).
+        Assumes formats like:
+        - '42 moreton road'
+        - 'flat 3 42 moreton road'
+        """
+        tokens = s.split()
+
+        # remove flat/unit context
+        cleaned = []
+        skip_next = False
+        for t in tokens:
+            if t in ("flat", "apt", "apartment", "unit"):
+                skip_next = True
+                continue
+            if skip_next:
+                skip_next = False
+                continue
+            cleaned.append(t)
+
+        # first remaining number is building number
+        for t in cleaned:
+            if re.fullmatch(r"\d+[a-z]?", t):
+                return t
+
+        return None
+
+    a_norm = normalise_address(a)
+    b_norm = normalise_address(b)
+
+    # --- hard signal: numbers ---
+    nums_a = extract_numbers(a_norm)
+    nums_b = extract_numbers(b_norm)
+
+    if nums_a and not nums_b:
+        return 0.0
+
+    # No shared numbers at all → impossible match
+    if nums_a and nums_b and nums_a.isdisjoint(nums_b):
+        return 0.0
+
+    # 🔒 HARD GUARD: building number must match
+    bld_a = extract_building_number(a_norm)
+    bld_b = extract_building_number(b_norm)
+
+    if bld_a and bld_b and bld_a != bld_b:
+        return 0.0
+
+    # --- order-sensitive flat/building guard ---
+    seq_a = extract_number_sequence(a_norm)
+    seq_b = extract_number_sequence(b_norm)
+
+    has_flat_token_user = any(
+        tok in a_norm for tok in ("flat", "apt", "apartment", "unit")
+    )
+    has_flat_token_epc = "flat" in b_norm
+
+    if (
+        len(seq_a) == 2
+        and len(seq_b) >= 2
+        and has_flat_token_epc
+        and not has_flat_token_user
+        and seq_a != seq_b[:2]
+    ):
+        return 0.0
+
+    # --- token similarity (order-independent) ---
+    toks_a = tokenise(a_norm)
+    toks_b = tokenise(b_norm)
+
+    if not toks_a or not toks_b:
+        token_score = 0.0
+    else:
+        token_score = len(toks_a & toks_b) / len(toks_a | toks_b)
+
+    # --- character similarity (soft signal) ---
+    char_score = SequenceMatcher(None, a_norm, b_norm).ratio()
+
+    # --- weighted blend ---
+    return round(
+        0.65 * token_score + 0.35 * char_score,
+        4,
+    )
+
+
+def normalise_address(s: str) -> str:
+    """
+    Canonical UK-focused address normalisation.
+
+    - Lowercases
+    - Removes punctuation (keeps / for flats)
+    - Normalises whitespace
+    - Applies synonym compression at token level
+    """
+
+    if not s:
+        return ""
+
+    ADDRESS_SYNONYMS = {
+        # street types
+        "rd": "road",
+        "rd.": "road",
+        "st": "street",
+        "st.": "street",
+        "ave": "avenue",
+        "ave.": "avenue",
+        "ln": "lane",
+        "ln.": "lane",
+        "cres": "crescent",
+        "ct": "court",
+        "dr": "drive",
+        # flats / units
+        "apt": "flat",
+        "apartment": "flat",
+        "unit": "flat",
+        "ste": "suite",
+        # numbering noise
+        "no": "",
+        "no.": "",
+    }
+    # 1. lowercase
+    s = s.lower()
+
+    # 1.5 split digit-letter suffixes
+    s = re.sub(r"(\d+)([a-z])\b", r"\1 \2", s)
+
+    # 2. remove punctuation except /
+    s = re.sub(r"[^\w\s/]", " ", s)
+
+    # 3. normalise whitespace
+    s = re.sub(r"\s+", " ", s).strip()
+
+    # 4. tokenise + synonym normalisation
+    tokens = []
+    for tok in s.split():
+        replacement = ADDRESS_SYNONYMS.get(tok, tok)
+        if replacement:
+            tokens.append(replacement)
+
+    return " ".join(tokens)
+
+
+def score_addresses(
+    df: pd.DataFrame,
+    user_address: str,
+    column: str = "address",
+) -> pd.Series:
+    if column not in df.columns:
+        raise ValueError(f"Missing column: {column}")
+
+    return df[column].apply(lambda x: levenshtein(user_address, x))
+
+
+def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
+    """
+    Recursively fetch EPC data by postcode.
+    If results hit the size limit, retry with double size up to max_attempts.
+    """
+    client = EpcClient(auth_token=EPC_AUTH_TOKEN)
+
+    url = os.path.join(client.domestic.host, "search")
+
+    if size:
+        url += "?" + urlencode({"size": size})
+
+    search_resp = client.domestic.call(
+        url=url,
+        method="get",
+        params={"postcode": postcode},
+    )
+
+    results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"])
+
+    row_count = len(results_df)
+
+    # If we hit the size limit, there *may* be more results
+    if row_count == size:
+        print(
+            f"⚠️ Warning: hit size limit ({size}) for postcode '{postcode}'. "
+            f"Attempt {attempt}/{max_attempts}."
+        )
+
+        if attempt < max_attempts:
+            print(f"🔁 Retrying with size={size * 2}")
+            return get_epc_data_with_postcode(
+                postcode=postcode,
+                size=size * 2,
+                attempt=attempt + 1,
+                max_attempts=max_attempts,
+            )
+        else:
+            print(
+                "🚨 Max attempts reached. Results may be truncated. "
+                "(Please do a manual review by the tech team.)"
+            )
+
+    return results_df
+
+
+def df_has_single_uprn(df: pd.DataFrame, uprn: str, column: str = "uprn") -> bool:
+    """
+    Returns True if all non-null UPRNs in df match the given uprn.
+    Returns False otherwise.
+    """
+
+    if column not in df.columns:
+        return False
+
+    # Drop nulls and normalise to string
+    uprns = df[column].dropna().astype(str).str.strip().unique()
+
+    # No valid UPRNs to compare
+    if len(uprns) == 0:
+        return False
+
+    # Exactly one unique UPRN and it matches
+    return len(uprns) == 1 and uprns[0] == str(uprn)
+
+
+def get_uprn_candidates(
+    df: pd.DataFrame,
+    user_address: str,
+    address_column: str = "address",
+    uprn_column: str = "uprn",
+) -> pd.DataFrame:
+    """
+    Annotate EPC results with lexicographical similarity scores and ranks.
+
+    Returns a DataFrame sorted by descending lexiscore.
+    DOES NOT choose or return a UPRN.
+    """
+
+    if address_column not in df.columns:
+        raise ValueError(f"Missing column: {address_column}")
+
+    if uprn_column not in df.columns:
+        raise ValueError(f"Missing column: {uprn_column}")
+
+    out = df.copy()
+
+    user_norm = normalise_address(user_address)
+
+    out["lexiscore"] = out[address_column].apply(lambda x: levenshtein(user_norm, x))
+
+    # Normalise UPRN to string
+    out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
+
+    # Rank: 1 = best match
+    out["lexirank"] = out["lexiscore"].rank(method="dense", ascending=False).astype(int)
+
+    return out.sort_values(
+        ["lexirank", "lexiscore"],
+        ascending=[True, False],
+    )
+
+
+def get_uprn(user_inputed_address: str, postcode: str):
+    """
+    Return uprn (str)
+    Return False if failed to find a sensible matching epc
+    Return Nons when epc found but no UPRN
+    """
+    df = get_epc_data_with_postcode(postcode=postcode)
+
+    if df.empty:
+        return None
+
+    scored_df = get_uprn_candidates(
+        df,
+        user_address=user_inputed_address,
+    )
+
+    # Best score
+    best_score = scored_df.iloc[0]["lexiscore"]
+
+    if best_score <= 0:
+        return None
+
+    # All rank-1 rows (possible draw)
+    top_rank_df = scored_df[scored_df["lexirank"] == 1]
+
+    # If rank-1 rows do not agree on a single UPRN → ambiguous
+    if not df_has_single_uprn(top_rank_df, uprn=top_rank_df.iloc[0]["uprn"]):
+        return None
+
+    address = top_rank_df["address"].values[0]
+    lexiscore = float(top_rank_df["lexiscore"].values[0])
+
+    logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
+    # Safe to return the agreed UPRN
+    found_uprn = top_rank_df.iloc[0]["uprn"]
+
+    if found_uprn == "":
+        return None
+
+    return found_uprn
+
+
+def resolve_uprns_for_postcode_group(
+    group_df: pd.DataFrame,
+    epc_df: pd.DataFrame,
+    address_col: str = "Address 1",
+) -> pd.DataFrame:
+    """
+    Given:
+      - group_df: rows sharing the same postcode
+      - epc_df: EPC search results for that postcode
+
+    Returns:
+      group_df + found_uprn + diagnostics
+    """
+
+    results = []
+
+    for _, row in group_df.iterrows():
+        user_address = str(row[address_col]).strip()
+
+        scored_df = get_uprn_candidates(
+            epc_df,
+            user_address=user_address,
+        )
+
+        if scored_df.empty:
+            results.append(
+                {
+                    "found_uprn": None,
+                    "best_match_uprn": None,
+                    "best_match_address": None,
+                    "best_match_lexiscore": None,
+                    "status": "no_epc_candidates",
+                }
+            )
+            continue
+
+        best_score = scored_df.iloc[0]["lexiscore"]
+
+        if best_score <= 0:
+            results.append(
+                {
+                    "found_uprn": None,
+                    "best_match_uprn": None,
+                    "best_match_address": None,
+                    "best_match_lexiscore": best_score,
+                    "status": "zero_score",
+                }
+            )
+            continue
+
+        top_rank_df = scored_df[scored_df["lexirank"] == 1]
+
+        if not df_has_single_uprn(top_rank_df, top_rank_df.iloc[0]["uprn"]):
+            results.append(
+                {
+                    "found_uprn": None,
+                    "best_match_uprn": top_rank_df.iloc[0]["uprn"],
+                    "best_match_address": top_rank_df.iloc[0]["address"],
+                    "best_match_lexiscore": best_score,
+                    "status": "ambiguous",
+                }
+            )
+            continue
+
+        results.append(
+            {
+                "found_uprn": str(top_rank_df.iloc[0]["uprn"]),
+                "best_match_uprn": str(top_rank_df.iloc[0]["uprn"]),
+                "best_match_address": top_rank_df.iloc[0]["address"],
+                "best_match_lexiscore": best_score,
+                "status": "matched",
+            }
+        )
+
+    return pd.concat(
+        [group_df.reset_index(drop=True), pd.DataFrame(results)],
+        axis=1,
+    )
+
+
+def test(a, b):
+    assert a == b, f"erorr: {a}{type(a)} != {b}: {type(b)}"
+
+
+def run_all_test():
+    # Basic usage with different post codes styles
+    test(get_epc_data_with_postcode("b93 8sy").shape[0], 63)
+    test(get_epc_data_with_postcode("B938sy").shape[0], 63)
+    test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
+    test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
+
+    test(get_uprn("68", "b93 8sy"), "100070989938")
+    test(get_uprn("68 Glendon Way", "b93 8sy"), "100070989938")
+    test(get_uprn("Flat A, 28, Nelgarde Road", "se6 4tf"), "100023278633")
+    test(get_uprn("28 A", "se6 4tf"), "100023278633")
+    test(get_uprn("28A", "se6 4tf"), "100023278633")
+    test(get_uprn("6 Aitken Close", "E8 4SQ"), False)
+
+    # unique case
+    test(get_uprn("Flat 5, 1, Semley Gate", "e9 5nh"), "10008238198")
+    test(get_uprn("5 ,  1 Semley Gate", "e9 5nh"), "10008238198")
+    test(get_uprn("5 Semley Gate", "e9 5nh"), "10008238198")
+    test(get_uprn("1, 5 Semley Gate", "e9 5nh"), False)
+    test(
+        get_uprn("1 Semley Gate", "e9 5nh"), "10008238188"
+    )  # this one return "flat 1, in 1 semley gate"
+    test(
+        get_uprn("48 Oswald Street", "E5 0BT"), False
+    )  # this one return "flat 1, in 1 semley gate"
+    test(
+        get_uprn("42 Oswald Street", "E5 0BT"), False
+    )  # this one return "flat 1, in 1 semley gate"
+    test(
+        get_uprn("46 Oswald Street", "E5 0BT"), False
+    )  # this one return "flat 1, in 1 semley gate"
+    get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street")
+    get_uprn_candidates(
+        get_epc_data_with_postcode("Cr2 7dl"),
+        "FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",
+    )
+
+
+if __name__ == "__main__":
+    INPUT_FILE = "hackney.xlsx"
+
+    ADDRESS_COL = "Address 1"
+    POSTCODE_COL = "Postcode"
+    UPRN_COL = "UPRN"
+
+    df = pd.read_excel(INPUT_FILE)
+
+    failures = []
+
+    for _, row in tqdm(
+        df.iterrows(),
+        total=len(df),
+        desc="Auditing UPRNs",
+    ):
+        input_address = str(row[ADDRESS_COL]).strip()
+        postcode = str(row[POSTCODE_COL]).strip()
+
+        expected_uprn = None if pd.isna(row[UPRN_COL]) else str(int(row[UPRN_COL]))
+
+        try:
+            epc_df = get_epc_data_with_postcode(postcode)
+
+            if epc_df.empty:
+                failures.append(
+                    {
+                        **row.to_dict(),
+                        "found_uprn": None,
+                        "best_match_uprn": None,
+                        "best_match_address": None,
+                        "best_match_lexiscore": None,
+                        "status": "no_epc_results",
+                    }
+                )
+                continue
+
+            scored_df = get_uprn_candidates(
+                epc_df,
+                user_address=input_address,
+            )
+
+            best_row = scored_df.iloc[0]
+
+            best_match_uprn = str(best_row["uprn"])
+            best_match_address = best_row["address"]
+            best_match_lexiscore = round(float(best_row["lexiscore"]), 4)
+
+            found_uprn = get_uprn(input_address, postcode)
+
+        except Exception as e:
+            failures.append(
+                {
+                    **row.to_dict(),
+                    "found_uprn": None,
+                    "best_match_uprn": None,
+                    "best_match_address": None,
+                    "best_match_lexiscore": None,
+                    "status": "exception",
+                    "error": str(e),
+                }
+            )
+            continue
+
+        found_uprn_norm = None if not found_uprn else str(found_uprn)
+
+        if found_uprn_norm != expected_uprn:
+            failures.append(
+                {
+                    **row.to_dict(),
+                    "found_uprn": found_uprn_norm,
+                    "best_match_uprn": best_match_uprn,
+                    "best_match_address": best_match_address,
+                    "best_match_lexiscore": best_match_lexiscore,
+                    "status": ("no_match" if found_uprn_norm is None else "mismatch"),
+                }
+            )
+
+    failures_df = pd.DataFrame(failures)
+
+    print("===================================")
+    print(f"Total rows : {len(df)}")
+    print(f"Failures   : {len(failures_df)}")
+    print("===================================")
+
+    failures_df.to_excel(
+        "hackney_uprn_failures.xlsx",
+        index=False,
+    )
+
+
+def handler(event, context):
+    print("hello world")
+    return {"statusCode": 200, "body": "hello world"}
+
+
+# TO do function dispatcher,
+
+# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate)
+# fix that
+# Look again at flat 1
+# pandas reader the seperate postcode_splitter
+# dump into s3
--- a/backend/address2UPRN/script.py
+++ b/backend/address2UPRN/script.py
@ -0,0 +1,17 @@
+import pandas as pd
+
+
+# use Address 1
+junte_df = pd.read_excel("hackney_uprn_failures.xlsx")
+
+
+# use domna_address_1
+khalim_df = pd.read_excel("khalim_standard.xlsx")
+
+
+combined_df = junte_df.merge(khalim_df, how="left", left_on="Address 1", right_on='domna_address_1')
+
+# Find the row in khalim_df that does not app
+
+result = combined_df[~pd.isnull(combined_df["epc_os_uprn"])]
+
--- a/backend/address2UPRN/tests/test_csv.py
+++ b/backend/address2UPRN/tests/test_csv.py
@ -0,0 +1,40 @@
+# tests/test_address_to_uprn_csv.py
+
+import csv
+import pytest
+from pathlib import Path
+from backend.address2UPRN.main import get_uprn
+
+FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
+
+
+def load_test_cases():
+    with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
+        reader = csv.DictReader(f)
+        return [
+            pytest.param(
+                row["User Input"],
+                row["Postcode"],
+                row["Manual UPRN Code"],
+                id=f'{row["User Input"]} [{row["Postcode"]}]',
+            )
+            for row in reader
+        ]
+
+
+@pytest.mark.parametrize(
+    "user_input,postcode,expected_uprn",
+    load_test_cases(),
+)
+def test_uprn_resolution_matches_manual(
+    user_input: str,
+    postcode: str,
+    expected_uprn: str,
+):
+    from utils.logger import setup_logger
+
+    uprn = get_uprn(user_input, postcode)
+    if uprn:
+        assert uprn == expected_uprn
+    else:
+        assert str(uprn) == expected_uprn
--- a/backend/address2UPRN/tests/test_data.csv
+++ b/backend/address2UPRN/tests/test_data.csv
@ -0,0 +1,366 @@
+User Input,Postcode,Manual UPRN Code
+47 The Fairway,OX16 0RR,100120771697
+11 REGENT COURT,SL1 3LG,100081041562
+3/137a Windmill Road,TW8 9NH,100021516998
+Flat 33,SW18 4BE,100023328943
+FLAT 1 Brendon Grove,N2 8JE,200013412
+Flat 15,KT8 2NE,100062123759
+FLAT 5 Stonehill Road,W4 3AH,100021589829
+10 Douglas Court,SL7 1UQ,100081278099
+1 Windmill Road,HP17 8JA,766034606
+31 Denewood,HP13 7LH,100081095964
+"10, Greenways Drive",TW4 5DD,10091597009
+Flat 10,W4 3AH,"100021589834"
+Flat 11,TW4 5DD,10091597010
+Flat 11,W4 3AH,100021589835
+"12, Greenways Drive",TW4 5DD,10091597011
+"Flat 12, Forbes House",W4 3AH,100021589836
+FLAT 1 Goodstone Court,HA1 4FL,10070269053
+Flat 13,TW4 5DD,10091597012
+Flat 13,W4 3AH,100021589837
+Flat 14,TW4 5DD,10091597013
+Flat 14,W4 3AH,100021589838
+Flat 15,TW4 5DD,10091597014
+Flat 15,W4 3AH,100021589839
+Flat 16,TW4 5DD,"10091597015"
+Flat 16,W4 3AH,100021589840
+Flat 17,TW4 5DD,10091597016
+Flat 17,W4 3AH,100021589841
+Flat 18,TW4 5DD,10091597017
+Flat 19,W4 3AH,100021589843
+Flat 20,W4 3AH,100021589844
+Flat 21,W4 3AH,100021589845
+Flat 22,W4 3AH,100021589846
+FLAT 2 Goodstone Court,HA1 4FL,10070269054
+Flat 23,W4 3AH,100021589847
+Flat 24,W4 3AH,100021589848
+"30c, Bosanquet Close",UB8 3PE,100021475316
+"30e, Bosanquet Close",UB8 3PE,100021475318
+FLAT 3 Goodstone Court,HA1 4FL,10070269055
+FLAT 4 Goodstone Court,HA1 4FL,10070269056
+FLAT 5 Goodstone Court,HA1 4FL,10070269057
+FLAT 6 Goodstone Court,HA1 4FL,10070269058
+FLAT 7 Goodstone Court,HA1 4FL,10070269059
+FLAT 8 Goodstone Court,HA1 4FL,10070269060
+FLAT 9 Goodstone Court,HA1 4FL,10070269061
+FLAT 10 Goodstone Court,HA1 4FL,10070269062
+FLAT 11 Goodstone Court,HA1 4FL,10070269063
+FLAT 12 Goodstone Court,HA1 4FL,10070269064
+FLAT 13 Goodstone Court,HA1 4FL,10070269065
+FLAT 14 Goodstone Court,HA1 4FL,10070269066
+FLAT 15 Goodstone Court,HA1 4FL,10070269067
+FLAT 16 Goodstone Court,HA1 4FL,10070269068
+FLAT 17 Goodstone Court,HA1 4FL,10070269069
+FLAT 18 Goodstone Court,HA1 4FL,10070269070
+FLAT 19 Goodstone Court,HA1 4FL,10070269071
+FLAT 20 Goodstone Court,HA1 4FL,10070269072
+FLAT 21 Goodstone Court,HA1 4FL,10070269073
+FLAT 22 Goodstone Court,HA1 4FL,10070269074
+FLAT 23 Goodstone Court,HA1 4FL,10070269075
+FLAT 24 Goodstone Court,HA1 4FL,10070269076
+FLAT 25 Goodstone Court,HA1 4FL,10070269077
+FLAT 26 Goodstone Court,HA1 4FL,10070269078
+FLAT 27 Goodstone Court,HA1 4FL,10070269079
+FLAT 28 Goodstone Court,HA1 4FL,10070269080
+FLAT 29 Goodstone Court,HA1 4FL,10070269081
+FLAT 30 Goodstone Court,HA1 4FL,10070269082
+FLAT 31 Goodstone Court,HA1 4FL,10070269083
+FLAT 32 Goodstone Court,HA1 4FL,10070269084
+FLAT 33 Goodstone Court,HA1 4FL,10070269085
+FLAT 34 Goodstone Court,HA1 4FL,10070269086
+FLAT 35 Goodstone Court,HA1 4FL,10070269087
+FLAT 36 Goodstone Court,HA1 4FL,10070269088
+FLAT 37 Goodstone Court,HA1 4FL,10070269089
+FLAT 38 Goodstone Court,HA1 4FL,10070269090
+FLAT 39 Goodstone Court,HA1 4FL,10070269091
+FLAT 40 Goodstone Court,HA1 4FL,10070269092
+FLAT 41 Goodstone Court,HA1 4FL,10070269093
+FLAT 42 Goodstone Court,HA1 4FL,10070269094
+FLAT 43 Goodstone Court,HA1 4FL,10070269095
+"13 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778260
+"14 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778259
+"15 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778258
+"16 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778263
+"17 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778262
+"18 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778261
+"19 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778266
+"20 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778265
+"21 Stubwick Court, Old Saw Mill Place",HP6 6FF,10013778264
+90a Murray Road,W5 4DA,12135293
+"Flat 1, 6 Wolverton Gardens",W5 3LJ,"12119972"
+"1, Monsted House",UB1 1FG,12189944
+"10, Monsted House",UB1 1FG,12189953
+"20, Monsted House",UB1 1FG,12189963
+"2, Monsted House",UB1 1FG,12189945
+"3, Monsted House",UB1 1FG,12189946
+"4, Monsted House",UB1 1FG,12189947
+"5, Monsted House",UB1 1FG,12189948
+"6, Monsted House",UB1 1FG,12189949
+"7, Monsted House",UB1 1FG,12189950
+"8, Monsted House",UB1 1FG,12189951
+"9, Monsted House",UB1 1FG,12189952
+"1 Cullis House, 1, Accolade Avenue",UB1 1FH,12189904
+"2 Cullis House, 1, Accolade Avenue",UB1 1FH,12189905
+"3 Cullis House, 1, Accolade Avenue",UB1 1FH,12189906
+"4 Cullis House, 1, Accolade Avenue",UB1 1FH,12189907
+"5 Cullis House, 1, Accolade Avenue",UB1 1FH,12189908
+"6 Cullis House, 1, Accolade Avenue",UB1 1FH,12189909
+1 Genteel House Samara Drive,UB1 1FJ,12189835
+2 Genteel House Samara Drive,UB1 1FJ,12189836
+3 Genteel House Samara Drive,UB1 1FJ,12189837
+4 Genteel House Samara Drive,UB1 1FJ,12189838
+5 Genteel House Samara Drive,UB1 1FJ,12189839
+6 Genteel House Samara Drive,UB1 1FJ,12189840
+7 Genteel House Samara Drive,UB1 1FJ,12189841
+8 Genteel House Samara Drive,UB1 1FJ,12189842
+9 Genteel House Samara Drive,UB1 1FJ,12189843
+10 Genteel House Samara Drive,UB1 1FJ,12189844
+1 ASH TREE HOUSE,SE5 0TE,None
+"Flat 1 Ash Tree House, 2, Thompson Avenue",SE5 0TE,10009803979
+3 ASH TREE HOUSE,SE5 0TE,None
+Flat 3 ASH TREE HOUSE,SE5 0TE,10009803981
+5 ASH TREE HOUSE,SE5 0TE,None
+Flat 5 ASH TREE HOUSE,SE5 0TE,10009803983
+Flat 8 ASH TREE HOUSE,SE5 0TE,10009803986
+8 ASH TREE HOUSE,SE5 0TE,None
+Flat 12 ASH TREE HOUSE,SE5 0TE,10009803990
+12 ASH TREE HOUSE,SE5 0TE,None
+FLAT 1 599 HARROW ROAD,W10 4RA,217113930
+FLAT 2 599 HARROW ROAD,W10 4RA,217113931
+FLAT 3 599 HARROW ROAD,W10 4RA,None
+FLAT 4 599 HARROW ROAD,W10 4RA,None
+FLAT 5 599 HARROW ROAD,W10 4RA,217113934
+FLAT 6 599 HARROW ROAD,W10 4RA,None
+FLAT 7 599 HARROW ROAD,W10 4RA,None
+FLAT 8 599 HARROW ROAD,W10 4RA,None
+"Flat 1, Ohio Building",SE13 7RX,10023226256
+"Flat 2, Ohio Building",SE13 7RX,10023226257
+"Apartment 1 Block B, 105, Benwell Road",N7 7BW,10012792307
+"Apartment 2 Block B, 105, Benwell Road",N7 7BW,10012792308
+"Apartment 3 Block B, 105, Benwell Road",N7 7BW,10012792309
+"Apartment 4 Block B, 105, Benwell Road",N7 7BW,10012792310
+"Apartment 5 Block B, 105, Benwell Road",N7 7BW,10012792311
+"Apartment 6 Block B, 105, Benwell Road",N7 7BW,10012792312
+"Apartment 7 Block B, 105, Benwell Road",N7 7BW,10012792313
+"Apartment 8 Block B, 105, Benwell Road",N7 7BW,10012792314
+"Apartment 9 Block B, 105, Benwell Road",N7 7BW,10012792315
+"Apartment 10 Block B, 105, Benwell Road",N7 7BW,10012792316
+"Apartment 11 Block B, 105, Benwell Road",N7 7BW,10012792317
+"Apartment 12 Block B, 105, Benwell Road",N7 7BW,10012792318
+"Apartment 13 Block B, 105, Benwell Road",N7 7BW,10012792319
+"Apartment 1 Block  D, 32, Hornsey Road",N7 7AT,10012792366
+"Apartment 2 Block  D, 32, Hornsey Road",N7 7AT,10012792367
+"Apartment 3 Block  D, 32, Hornsey Road",N7 7AT,10012792368
+"Apartment 4 Block  D, 32, Hornsey Road",N7 7AT,10012792369
+"Apartment 5 Block  D, 32, Hornsey Road",N7 7AT,10012792370
+"Apartment 6 Block  D, 32, Hornsey Road",N7 7AT,"10012792371"
+"Apartment 7 Block  D, 32, Hornsey Road",N7 7AT,10012792372
+"Apartment 8 Block  D, 32, Hornsey Road",N7 7AT,10012792373
+"Apartment 9 Block  D, 32, Hornsey Road",N7 7AT,10012792374
+"Apartment 10 Block  D, 32, Hornsey Road",N7 7AT,10012792375
+"Apartment 11 Block  D, 32, Hornsey Road",N7 7AT,10012792376
+"Apartment 12 Block  D, 32, Hornsey Road",N7 7AT,10012792377
+"Apartment 13 Block  D, 32, Hornsey Road",N7 7AT,10012792378
+"Apartment 14 Block  D, 32, Hornsey Road",N7 7AT,10012792379
+"Apartment 15 Block  D, 32, Hornsey Road",N7 7AT,10012792380
+"Apartment 16 Block  D, 32, Hornsey Road",N7 7AT,"10012792381"
+"Apartment 17Block  D, 32, Hornsey Road",N7 7AT,10012792382
+"Apartment 18 Block  D, 32, Hornsey Road",N7 7AT,10012792383
+24b Honley Road,SE6 2HZ,None
+FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
+2 COLLEGE HOUSE,CM7 1JS,100091449870
+3 COLLEGE HOUSE,CM7 1JS,100091449871
+1 Anita Street,M4 5DU,None
+2 Anita Street,M4 5DU,77123061
+5 Anita Street,M4 5DU,77123081
+6 Anita Street,M4 5DU,77123082
+8 Anita Street,M4 5DU,None
+9 Anita Street,M4 5DU,None
+10 Anita Street,M4 5DU,77123051
+12 Anita Street,M4 5DU,77123053
+19 Anita Street,M4 5DU,None
+22 Anita Street,M4 5DU,None
+26 Anita Street,M4 5DU,77123068
+28 Anita Street,M4 5DU,None
+30 Anita Street,M4 5DU,None
+32 Anita Street,M4 5DU,None
+33 Anita Street,M4 5DU,77123076
+34 Anita Street,M4 5DU,None
+35 Anita Street,M4 5DU,77123078
+36 Anita Street,M4 5DU,77123079
+23 George Leigh Street,M4 5DR,77123171
+25 George Leigh Street,M4 5DR,None
+35 George Leigh Street,M4 5DR,77123177
+39 George Leigh Street,M4 5DR,77123179
+41 George Leigh Street,M4 5DR,None
+43 George Leigh Street,M4 5DR,None
+49 George Leigh Street,M4 5DR,None
+51 George Leigh Street,M4 5DR,77123185
+55 George Leigh Street,M4 5DR,None
+57 George Leigh Street,M4 5DR,None
+"1a, Victoria Square",M4 5DX,77211153
+2a Victoria Square ,M4 5DX,None
+"4a, Victoria Square",M4 5DX,77211155
+5a Victoria Square,M4 5DX,77211156
+ 6a Victoria Square,M4 5DX,77211157
+7a Victoria Square,M4 5DX,77211158
+8a Victoria Square,M4 5DX,77211159
+9a Victoria Square,M4 5DX,77211160
+10a Victoria Square,M4 5DX,77211161
+11a Victoria Square,M4 5DX,77211162
+12a Victoria Square,M4 5DX,77211163
+13a Victoria Square,M4 5DX,77211164
+14a Victoria Square,M4 5DX,77211165
+15a Victoria Square,M4 5DX,77211166
+16a Victoria Square,M4 5DX,77211167
+17a Victoria Square,M4 5DX,77211168
+18a Victoria Square,M4 5DX,77211169
+19a Victoria Square,M4 5DX,77211170
+20a Victoria Square,M4 5DX,77211171
+21a Victoria Square,M4 5DY,77211172
+22a Victoria Square,M4 5DY,None
+23a Victoria Square,M4 5DY,77211174
+24a Victoria Square,M4 5DY,77211175
+25a Victoria Square,M4 5DY,77211176
+26a Victoria Square,M4 5DY,77211177
+27a Victoria Square,M4 5DY,77211178
+28a Victoria Square,M4 5DY,None
+29a Victoria Square,M4 5DY,77211180
+30a Victoria Square,M4 5DY,77211181
+31a Victoria Square,M4 5DY,77211182
+32a Victoria Square,M4 5DY,77211183
+33a Victoria Square,M4 5DY,77211184
+34a Victoria Square,M4 5DY,77211185
+35a Victoria Square,M4 5DY,None
+36a Victoria Square,M4 5DY,77211187
+37a Victoria Square,M4 5DY,77211188
+38a Victoria Square,M4 5DY,77211189
+39a Victoria Square,M4 5DY,77211190
+40a Victoria Square,M4 5DY,None
+41a Victoria Square,M4 5DY,77211192
+42a Victoria Square,M4 5DY,77211193
+43a Victoria Square,M4 5DY,77211194
+44a Victoria Square,M4 5DY,77211195
+45a Victoria Square,M4 5DY,77211196
+46a Victoria Square,M4 5DY,77211197
+47a Victoria Square,M4 5DY,77211198
+48a Victoria Square,M4 5DY,77211199
+49a Victoria Square,M4 5DY,77211200
+50a Victoria Square,M4 5DY,77211201
+51a Victoria Square,M4 5DY,77211202
+52a Victoria Square,M4 5DY,77211203
+53a Victoria Square,M4 5DY,77211204
+54a Victoria Square,M4 5DY,77211205
+55a Victoria Square,M4 5DY,77211206
+56a Victoria Square,M4 5DZ,77211207
+57a Victoria Square,M4 5DZ,None
+58a Victoria Square,M4 5DZ,77211209
+59a Victoria Square,M4 5DZ,77211210
+60a Victoria Square,M4 5DZ,77211211
+61a Victoria Square,M4 5DZ,77211212
+62a Victoria Square,M4 5DZ,77211213
+63a Victoria Square,M4 5DZ,None
+64a Victoria Square,M4 5DZ,77211215
+65a Victoria Square,M4 5DZ,77211216
+66a Victoria Square,M4 5DZ,None
+67a Victoria Square,M4 5DZ,None
+68a Victoria Square,M4 5DZ,77211219
+69a Victoria Square,M4 5DZ,77211220
+70a Victoria Square,M4 5DZ,77211221
+71a Victoria Square,M4 5DZ,77211222
+72a Victoria Square,M4 5DZ,77211223
+73a Victoria Square,M4 5DZ,77211224
+74a Victoria Square,M4 5DZ,None
+75a Victoria Square,M4 5DZ,77211226
+76a Victoria Square,M4 5DZ,77211227
+77a Victoria Square,M4 5DZ,None
+78a Victoria Square,M4 5DZ,77211229
+79a Victoria Square,M4 5DZ,77211230
+80a Victoria Square,M4 5DZ,77211231
+81a Victoria Square,M4 5DZ,77211232
+82 Victoria Square,M4 5DZ,None
+83a Victoria Square,M4 5DZ,77211234
+84a Victoria Square,M4 5DZ,None
+85a Victoria Square,M4 5DZ,77211236
+86a Victoria Square,M4 5DZ,77211237
+87a Victoria Square,M4 5DZ,77211238
+88a Victoria Square,M4 5DZ,None
+89a Victoria Square,M4 5DZ,77211240
+90a Victoria Square,M4 5DZ,77211241
+91a Victoria Square,M4 5DZ,77211242
+92a Victoria Square,M4 5DZ,77211243
+93a Victoria Square,M4 5EA,77211244
+94a Victoria Square,M4 5EA,None
+95a Victoria Square,M4 5EA,77211246
+96a Victoria Square,M4 5EA,77211247
+97a Victoria Square,M4 5EA,77211248
+98a Victoria Square,M4 5EA,77211249
+99a Victoria Square,M4 5EA,77211250
+100a Victoria Square,M4 5EA,77211251
+101a Victoria Square,M4 5EA,None
+102a Victoria Square,M4 5EA,None
+103a Victoria Square,M4 5EA,77211254
+104a Victoria Square,M4 5EA,77211255
+105a Victoria Square,M4 5EA,None
+106a Victoria Square,M4 5EA,77211257
+107a Victoria Square,M4 5EA,77211258
+108a Victoria Square,M4 5EA,77211259
+109a Victoria Square,M4 5EA,77211260
+110a Victoria Square,M4 5EA,77211261
+111a Victoria Square,M4 5EA,77211262
+112a Victoria Square,M4 5EA,None
+113a Victoria Square,M4 5EA,77211264
+114a Victoria Square,M4 5EA,77211265
+115a Victoria Square,M4 5EA,77211266
+116a Victoria Square,M4 5EA,77211267
+117a Victoria Square,M4 5EA,None
+118a Victoria Square,M4 5EA,None
+119a Victoria Square,M4 5EA,77211270
+120a Victoria Square,M4 5EA,77211271
+121a Victoria Square,M4 5EA,77211272
+122a Victoria Square,M4 5EA,77211273
+123a Victoria Square,M4 5EA,77211274
+124a Victoria Square,M4 5EA,None
+125a Victoria Square,M4 5EA,77211276
+126a Victoria Square,M4 5EA,77211277
+127a Victoria Square,M4 5EA,77211278
+128a Victoria Square,M4 5EA,77211279
+129a Victoria Square,M4 5EA,77211280
+130a Victoria Square,M4 5FA,77211281
+131a Victoria Square,M4 5FA,77211282
+132a Victoria Square,M4 5FA,77211283
+133a Victoria Square,M4 5FA,None
+134a Victoria Square,M4 5FA,77211285
+135a Victoria Square,M4 5FA,77211286
+136a Victoria Square,M4 5FA,77211287
+137a Victoria Square,M4 5FA,77211288
+138a Victoria Square,M4 5FA,77211289
+139a Victoria Square,M4 5FA,77211290
+140a Victoria Square,M4 5FA,77211291
+141a Victoria Square,M4 5FA,77211292
+142a Victoria Square,M4 5FA,77211293
+143a Victoria Square,M4 5FA,77211294
+144a Victoria Square,M4 5FA,77211295
+145a Victoria Square,M4 5FA,None
+146a Victoria Square,M4 5FA,77211297
+147a Victoria Square,M4 5FA,77211298
+148a Victoria Square,M4 5FA,77211299
+149a Victoria Square,M4 5FA,77211300
+150a Victoria Square,M4 5FA,77211301
+151a Victoria Square,M4 5FA,None
+152a Victoria Square,M4 5FA,77211303
+153a Victoria Square,M4 5FA,None
+154a Victoria Square,M4 5FA,77211305
+155a Victoria Square,M4 5FA,None
+156a Victoria Square,M4 5FA,77211307
+157a Victoria Square,M4 5FA,77211308
+158a Victoria Square,M4 5FA,77211309
+159a Victoria Square,M4 5FA,None
+160a Victoria Square,M4 5FA,77211311
+161a Victoria Square,M4 5FA,None
+162a Victoria Square,M4 5FA,None
+163a Victoria Square,M4 5FA,77211314
+164a Victoria Square,M4 5FA,77211315
+165a Victoria Square,M4 5FA,77211316
+166a Victoria Square,M4 5FA,None
+"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -42,7 +42,7 @@ class Settings(BaseSettings):
    AWS_DEFAULT_REGION: Optional[str] = None

    class Config:
-        env_file = "backend/.env"
+        env_file = "backend/.env.local"


@lru_cache()
--- a/backend/app/db/connection.py
+++ b/backend/app/db/connection.py
@ -3,7 +3,9 @@ from contextlib import contextmanager
 from backend.app.config import get_settings
 from sqlmodel import Session

-connection_string = "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
+connection_string = (
+    "postgresql+{drivername}://{username}:{password}@{server}:{port}/{dbname}"
+)
 db_string = connection_string.format(
    drivername="psycopg2",  # You'll need to use psycopg2 driver for PostgreSQL
    username=get_settings().DB_USERNAME,
@ -28,7 +30,9 @@ db_engine = create_engine(

 def get_db_session():
    if db_engine is None:
-        raise RuntimeError("Database is not configured. Set DATABASE_URL in environment variables.")
+        raise RuntimeError(
+            "Database is not configured. Set DATABASE_URL in environment variables."
+        )
    return Session(db_engine)


--- a/backend/app/db/functions/condition_functions.py
+++ b/backend/app/db/functions/condition_functions.py
@ -0,0 +1,12 @@
+from typing import List
+from sqlalchemy import insert, delete
+from sqlalchemy.orm import Session
+
+from backend.app.db.connection import db_session, db_read_session
+from backend.app.db.models.condition import PropertyConditionSurveyModel
+
+
+def bulk_insert_property_surveys(
+    session: Session, surveys: List[PropertyConditionSurveyModel]
+) -> None:
+    raise NotImplementedError
--- a/backend/app/db/models/condition.py
+++ b/backend/app/db/models/condition.py
@ -0,0 +1,97 @@
+from sqlalchemy import (
+    BigInteger,
+    Column,
+    Date,
+    ForeignKey,
+    Integer,
+    String,
+    Enum as SqlEnum,
+)
+from sqlalchemy.orm import declarative_base, relationship
+
+from backend.condition.domain.aspect_type import AspectType
+from backend.condition.domain.element_type import ElementType
+
+Base = declarative_base()
+
+ElementTypeDb = SqlEnum(
+    ElementType,
+    name="element_type",
+    native_enum=True,
+    values_callable=lambda enum: [e.value for e in enum],
+)
+
+AspectTypeDb = SqlEnum(
+    AspectType,
+    name="aspect_type",
+    native_enum=True,
+    values_callable=lambda enum: [a.value for a in enum],
+)
+
+
+class PropertyConditionSurveyModel(Base):
+    __tablename__ = "property_condition_survey"
+
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    uprn = Column(BigInteger, nullable=False)
+
+    date = Column(Date, nullable=False)
+    source = Column(String, nullable=False)
+
+    elements = relationship(
+        "ElementModel",
+        back_populates="survey",
+        cascade="all, delete-orphan",
+    )
+
+
+class ElementModel(Base):
+    __tablename__ = "element"  # TODO: rename to survey_element?
+
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+
+    survey_id = Column(
+        BigInteger,
+        ForeignKey("property_condition_survey.id"),
+        nullable=False,
+    )
+
+    element_type = Column(ElementTypeDb, nullable=False)
+    element_instance = Column(BigInteger, nullable=False)
+
+    survey = relationship(
+        "PropertyConditionSurveyModel",
+        back_populates="elements",
+    )
+
+    aspect_conditions = relationship(
+        "AspectConditionModel",
+        back_populates="element",
+        cascade="all, delete-orphan",
+    )
+
+
+class AspectConditionModel(Base):
+    __tablename__ = "aspect_condition"  # TODO: rename to survey_aspect?
+
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+
+    element_id = Column(
+        BigInteger,
+        ForeignKey("element.id"),
+        nullable=False,
+    )
+
+    aspect_type = Column(AspectTypeDb, nullable=False)
+    aspect_instance = Column(BigInteger, nullable=False)
+
+    value = Column(String)
+    quantity = Column(Integer)
+    install_date = Column(Date)
+    renewal_year = Column(Integer)
+    comments = Column(String)
+
+    element = relationship(
+        "ElementModel",
+        back_populates="aspect_conditions",
+    )
--- a/backend/app/requirements/requirements.txt
+++ b/backend/app/requirements/requirements.txt
@ -1,3 +1,4 @@
+
 # fastapi
 fastapi==0.115.2
 sqlalchemy==2.0.36
@ -13,4 +14,3 @@ openpyxl==3.1.2
 # Basic
 pytz
 sqlmodel
-
--- a/backend/condition/README.md
+++ b/backend/condition/README.md
@ -20,7 +20,7 @@ The processor currently supports file formats provided by **Peabody** and **LBWF

 The `local_runner` script allows the processor to be executed in a local environment.

-1. Copy a sample input file into the `sample_data/` directory.
+1. Copy sample input file(s) into the `sample_data/` directory. If working with Peabody data, you'll need the Landlord Reference / UPRN lookup file as well. 
 2. Update `local_runner.py` as required, specifically the definitions of:
   - `lbwf_path`
   - `peabody_path`
--- a/backend/condition/local_runner.py
+++ b/backend/condition/local_runner.py
@ -21,6 +21,8 @@ def main() -> None:
        / "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D  Lower.xlsx"
    )
    filepaths = [lbwf_path, peabody_path]
+    # filepaths = [lbwf_path]
+    # filepaths = [peabody_path]

    for fp in filepaths:
        with fp.open("rb") as f:
--- a/backend/condition/parsing/lbwf_parser.py
+++ b/backend/condition/parsing/lbwf_parser.py
@ -1,4 +1,4 @@
-from typing import BinaryIO, Any, Dict, Iterator, List, Tuple
+from typing import BinaryIO, Any, Dict, Iterator, List, Optional, Tuple
 from openpyxl import Workbook, load_workbook
 from collections import defaultdict

@ -15,7 +15,11 @@ logger = setup_logger()

 class LbwfParser(Parser):

-    def parse(self, file_stream: BinaryIO) -> Any:
+    def parse(
+        self,
+        file_stream: BinaryIO,
+        location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
+    ) -> Any:
        wb: Workbook = load_workbook(file_stream)
        address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(
            wb
--- a/backend/condition/parsing/parser.py
+++ b/backend/condition/parsing/parser.py
@ -1,8 +1,13 @@
 from abc import ABC, abstractmethod
-from typing import BinaryIO, Any
+from typing import BinaryIO, Any, Dict, Optional
+

 class Parser(ABC):

    @abstractmethod
-    def parse(self, file_stream: BinaryIO) -> Any:
+    def parse(
+        self,
+        file_stream: BinaryIO,
+        location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
+    ) -> Any:
        pass
--- a/backend/condition/parsing/peabody_parser.py
+++ b/backend/condition/parsing/peabody_parser.py
@ -1,26 +1,55 @@
-from typing import Any, BinaryIO, Dict, Iterator, List, Tuple, DefaultDict
+import csv
+from pathlib import Path
+from typing import Any, BinaryIO, Dict, List, Optional, Tuple, DefaultDict
 from openpyxl import Workbook, load_workbook
 from collections import defaultdict

 from backend.condition.parsing.parser import Parser
-from backend.condition.parsing.records.peabody.peabody_asset_condition import PeabodyAssetCondition
+from backend.condition.parsing.records.peabody.peabody_asset_condition import (
+    PeabodyAssetCondition,
+)
 from backend.condition.parsing.records.peabody.peabody_property import PeabodyProperty
 from utils.logger import setup_logger

 logger = setup_logger()

+
 class PeabodyParser(Parser):
-    def parse(self, file_stream: BinaryIO) -> Any:
+    def parse(
+        self,
+        file_stream: BinaryIO,
+        location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
+    ) -> Any:
        wb: Workbook = load_workbook(file_stream)
-        address_to_uprn_map: Dict[str, int] = PeabodyParser._generate_address_to_uprn_dict(wb)

-        assets = self._parse_assets(wb)
-
-        return self._group_assets_into_properties(
-            assets=assets,
-            address_to_uprn_map=address_to_uprn_map,
+        if location_ref_to_uprn_map is None:
+            location_ref_to_uprn_map: Dict[str, int] = (
+                PeabodyParser._build_location_ref_to_uprn_map()
            )

+        assets = PeabodyParser._parse_assets(wb)
+
+        return PeabodyParser._group_assets_into_properties(
+            assets=assets,
+            location_ref_to_uprn_map=location_ref_to_uprn_map,
+        )
+
+    @staticmethod
+    def _build_location_ref_to_uprn_map() -> Dict[str, int]:
+        location_ref_to_uprn_filepath: Path = (
+            Path(__file__).resolve().parents[1]
+            / "sample_data"
+            / "peabody"
+            / "PeabodyPropertymatched_Dec25_propref_UPRN.csv"
+        )
+        location_ref_to_uprn_map: Dict[str, int] = {}
+
+        with location_ref_to_uprn_filepath.open(newline="") as f:
+            reader: Any = csv.DictReader(f)
+            for row in reader:
+                location_ref_to_uprn_map[row["reference"]] = int(row["out_uprn"])
+
+        return location_ref_to_uprn_map

    @staticmethod
    def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]:
@ -33,7 +62,9 @@ class PeabodyParser(Parser):
        assets: List[PeabodyAssetCondition] = []
        for row in asset_rows:
            try:
-                asset = PeabodyParser._map_row_to_asset_record(row, asset_header_indexes)
+                asset = PeabodyParser._map_row_to_asset_record(
+                    row, asset_header_indexes
+                )
                if not asset.is_block_level:
                    # Block-level condition surveys are out of scope for now
                    # until we have a wider think on how to handle block
@ -48,24 +79,26 @@ class PeabodyParser(Parser):
    @staticmethod
    def _group_assets_into_properties(
        assets: List[PeabodyAssetCondition],
-        address_to_uprn_map: Dict[str, int],
+        location_ref_to_uprn_map: Dict[str, int],
    ) -> List[PeabodyProperty]:
-        assets_by_address: DefaultDict[str, List[PeabodyAssetCondition]] = defaultdict(list)
+        assets_by_location_reference: DefaultDict[str, List[PeabodyAssetCondition]] = (
+            defaultdict(list)
+        )

        for asset in assets:
-            if asset.full_address is None:
+            if asset.lo_reference is None:
                continue

-            address = asset.full_address.strip()
-            assets_by_address[address].append(asset)
+            assets_by_location_reference[asset.lo_reference].append(asset)

        properties: List[PeabodyProperty] = []

-        for address, grouped_assets in assets_by_address.items():
-            uprn = address_to_uprn_map.get(address)
+        for location_ref, grouped_assets in assets_by_location_reference.items():
+
+            uprn = location_ref_to_uprn_map.get(location_ref)

            if uprn is None:
-                logger.warning(f"No UPRN found for address: {address}")
+                logger.warning(f"No UPRN found for Location Reference: {location_ref}")
                continue

            properties.append(
@ -77,7 +110,6 @@ class PeabodyParser(Parser):

        return properties

-                
    @staticmethod
    def _map_row_to_asset_record(
        row: Any | Tuple[object | None, ...],
@ -102,39 +134,9 @@ class PeabodyParser(Parser):
            condition_survey_date=row[header_indexes["condition_survey_date"]],
        )

-    @staticmethod
-    def _generate_address_to_uprn_dict(wb: Workbook) -> Dict[str, int | None]:
-        sheet = wb["Survey Records - D & Lower"]
-        rows: Iterator[Tuple[object | None, ...]] = sheet.iter_rows(values_only=True)
-
-        headers = next(rows)
-        header_indexes: Dict[str, int] = PeabodyParser._get_column_indexes_by_name(headers)
-
-        address_idx = header_indexes["full_address"]
-
-        
-        address_to_uprn: Dict[str, int] = {}
-        # Generate random UPRNs for now
-        next_uprn = 1 # TODO: get real UPRNs
-
-        for row in rows:
-            address = row[address_idx]
-
-            if address is None:
-                continue
-
-            address = address.strip()
-
-            if address not in address_to_uprn:
-                address_to_uprn[address] = next_uprn
-                next_uprn += 1
-
-        return address_to_uprn
-
-    
    @staticmethod
    def _get_column_indexes_by_name(
-        headers: Tuple[object | None, ...]
+        headers: Tuple[object | None, ...],
    ) -> Dict[str, int]:
        index: Dict[str, int] = {}

--- a/backend/condition/persistence/condition_postgres.py
+++ b/backend/condition/persistence/condition_postgres.py
@ -0,0 +1,86 @@
+import time
+from typing import List, Optional
+from sqlmodel import Session
+
+from utils.logger import setup_logger
+from backend.app.db.models.condition import (
+    AspectConditionModel,
+    ElementModel,
+    PropertyConditionSurveyModel,
+)
+from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
+from backend.app.db.connection import db_session
+
+logger = setup_logger()
+
+
+class ConditionPostgres:
+
+    def bulk_insert_surveys(
+        self, surveys: List[PropertyConditionSurvey], batch_size: Optional[int] = 100
+    ) -> None:
+        logger.info(
+            f"Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..."
+        )
+        survey_models: List[PropertyConditionSurveyModel] = [
+            ConditionPostgres.map_survey_to_model(s) for s in surveys
+        ]
+        total: int = len(survey_models)
+        logger.info(
+            f"Finished mapping {total} surveys. Writing to database in batches of {batch_size}..."
+        )
+
+        with db_session() as session:
+            for start in range(0, total, batch_size):
+                end = min(start + batch_size, total)
+                batch = survey_models[start:end]
+
+                t0: float = time.perf_counter()
+                ConditionPostgres._insert_surveys_batch(batch, session)
+                elapsed: float = time.perf_counter() - t0
+
+                logger.info(
+                    f"Inserted batch {start} - {end} ({len(batch)} surveys) in {elapsed} seconds",
+                )
+
+    @staticmethod
+    def map_survey_to_model(
+        survey: PropertyConditionSurvey,
+    ) -> PropertyConditionSurveyModel:
+        survey_model = PropertyConditionSurveyModel(
+            uprn=survey.uprn,
+            date=survey.date,
+            source=survey.source,
+            elements=[],
+        )
+
+        for element in survey.elements:
+            element_model = ElementModel(
+                element_type=element.element_type,
+                element_instance=element.element_instance,
+                aspect_conditions=[],
+            )
+
+            for aspect in element.aspect_conditions:
+                aspect_model = AspectConditionModel(
+                    aspect_type=aspect.aspect_type,
+                    aspect_instance=aspect.aspect_instance,
+                    value=aspect.value,
+                    quantity=aspect.quantity,
+                    install_date=aspect.install_date,
+                    renewal_year=aspect.renewal_year,
+                    comments=aspect.comments,
+                )
+
+                element_model.aspect_conditions.append(aspect_model)
+
+            survey_model.elements.append(element_model)
+
+        return survey_model
+
+    @staticmethod
+    def _insert_surveys_batch(
+        surveys: List[PropertyConditionSurveyModel], session: Session
+    ) -> None:
+        session.add_all(surveys)
+        session.commit()
--- a/backend/condition/processor.py
+++ b/backend/condition/processor.py
@ -1,25 +1,33 @@
 from typing import Any, BinaryIO, List
 from datetime import datetime

+from utils.logger import setup_logger
 from backend.condition.domain.mapping.mapper import Mapper
 from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
 from backend.condition.parsing.parser import Parser
-from utils.logger import setup_logger
+from backend.condition.persistence.condition_postgres import ConditionPostgres
 from backend.condition.file_type import FileType, detect_file_type
 from backend.condition.parsing.factory import select_parser, select_mapper

+logger = setup_logger()
+

 def process_file(file_stream: BinaryIO, source_key: str) -> None:
-    print(f"[processor] Received file: {source_key}")
+    logger.info(f"[processor] Received file: {source_key}")

    # Instantiation
    file_type: FileType = detect_file_type(source_key)
    parser: Parser = select_parser(file_type)
    mapper: Mapper = select_mapper(file_type)
+    persistence = ConditionPostgres()

    # Orchestration
    raw_properties: List[Any] = parser.parse(file_stream)

+    logger.info(
+        f"[processor] Finished loading customer survey data for {len(raw_properties)} properties. Mapping..."
+    )
+
    survey_year = datetime.now().year  # TODO: get this from filepath or elsewhere

    property_condition_surveys: List[PropertyConditionSurvey] = []
@ -29,4 +37,10 @@ def process_file(file_stream: BinaryIO, source_key: str) -> None:
            mapper.map_asset_conditions_for_property(p, survey_year)
        )

-    print("done")  # temp
+    logger.info(
+        f"[processor] Finished mapping {len(property_condition_surveys)} properties. Writing to database..."
+    )
+
+    persistence.bulk_insert_surveys(property_condition_surveys)
+
+    logger.info(f"[processor] Finished loading surveys to database")
--- a/backend/condition/tests/custom_asserts.py
+++ b/backend/condition/tests/custom_asserts.py
@ -1,3 +1,4 @@
+from backend.app.db.models.condition import PropertyConditionSurveyModel
 from backend.condition.domain.property_condition_survey import PropertyConditionSurvey


@ -72,3 +73,41 @@ class CustomAsserts:
                    f"{actual_aspect.comments} != {expected_aspect.comments}"
                )
        return True
+
+    def assert_property_condition_survey_model_matches_expected(
+        actual_model: PropertyConditionSurveyModel,
+        expected: dict,
+    ) -> None:
+        assert actual_model.uprn == expected["uprn"], "UPRN differs"
+        assert actual_model.date == expected["date"], "Date differs"
+        assert actual_model.source == expected["source"], "Source differs"
+
+        assert len(actual_model.elements) == len(expected["elements"]), (
+            f"Expected {len(expected['elements'])} elements, "
+            f"got {len(actual_model.elements)}"
+        )
+
+        for i, (actual_element, expected_element) in enumerate(
+            zip(actual_model.elements, expected["elements"])
+        ):
+            assert (
+                actual_element.element_type == expected_element["element_type"]
+            ), f"Element[{i}].element_type differs"
+            assert (
+                actual_element.element_instance == expected_element["element_instance"]
+            ), f"Element[{i}].element_instance differs"
+
+            assert len(actual_element.aspect_conditions) == len(
+                expected_element["aspects"]
+            ), f"Element[{i}] aspect count differs"
+
+            for j, (actual_aspect, expected_aspect) in enumerate(
+                zip(actual_element.aspect_conditions, expected_element["aspects"])
+            ):
+                prefix = f"Element[{i}].Aspect[{j}]"
+
+                for key, value in expected_aspect.items():
+                    assert getattr(actual_aspect, key) == value, (
+                        f"{prefix}.{key} differs: "
+                        f"{getattr(actual_aspect, key)} != {value}"
+                    )
--- a/backend/condition/tests/parsing/test_peabody_parser.py
+++ b/backend/condition/tests/parsing/test_peabody_parser.py
@ -1,19 +1,23 @@
 import pytest
-from typing import Any
+from typing import Any, Dict
 from io import BytesIO
 from openpyxl import Workbook
 from datetime import datetime

 from backend.condition.parsing.peabody_parser import PeabodyParser
-from backend.condition.parsing.records.peabody.peabody_asset_condition import PeabodyAssetCondition
+from backend.condition.parsing.records.peabody.peabody_asset_condition import (
+    PeabodyAssetCondition,
+)
 from backend.condition.parsing.records.peabody.peabody_property import PeabodyProperty

+
@pytest.fixture
 def peabody_assets_xlsx_bytes() -> BytesIO:
    wb = Workbook()
    survey_records_d_and_lower = wb.active
    survey_records_d_and_lower.title = "Survey Records - D & Lower"
-    survey_records_d_and_lower.append([
+    survey_records_d_and_lower.append(
+        [
            "Lo_Reference",
            "full_address",
            "location_type_code",
@ -30,8 +34,10 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
            "cloned",
            "lo_type_code",
            "condition_survey_date",
-    ])
-    survey_records_d_and_lower.append([
+        ]
+    )
+    survey_records_d_and_lower.append(
+        [
            "B000RAND",
            "1 RANDOM HOUSE LONDON",
            3,
@ -47,9 +53,11 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
            330,
            "N",
            3,
-        datetime(2025,12,4,9,17,0)
-    ])
-    survey_records_d_and_lower.append([
+            datetime(2025, 12, 4, 9, 17, 0),
+        ]
+    )
+    survey_records_d_and_lower.append(
+        [
            "B000BLOCK",
            "1100 BLOCK",
            3,
@ -65,9 +73,11 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
            330,
            "N",
            3,
-        datetime(2025,12,4,9,17,0)
-    ])
-    survey_records_d_and_lower.append([
+            datetime(2025, 12, 4, 9, 17, 0),
+        ]
+    )
+    survey_records_d_and_lower.append(
+        [
            "B000FAKE",
            "3 FAKE CLOSE LONDON",
            3,
@ -83,9 +93,11 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
            1500.7,
            "N",
            3,
-        datetime(2025,7,5,0,0,0)
-    ])
-    survey_records_d_and_lower.append([
+            datetime(2025, 7, 5, 0, 0, 0),
+        ]
+    )
+    survey_records_d_and_lower.append(
+        [
            "B000MIS",
            "99 MISC ROAD LONDON",
            3,
@ -101,9 +113,11 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
            None,
            "N",
            3,
-        None
-    ])
-    survey_records_d_and_lower.append([
+            None,
+        ]
+    )
+    survey_records_d_and_lower.append(
+        [
            "B000MIS",
            "99 MISC ROAD LONDON",
            3,
@ -119,9 +133,9 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
            3531,
            "N",
            3,
-        None
-    ])
-
+            None,
+        ]
+    )

    stream = BytesIO()
    wb.save(stream)
@ -129,18 +143,32 @@ def peabody_assets_xlsx_bytes() -> BytesIO:

    return stream

-def test_peabody_parser_parses_conditions(peabody_assets_xlsx_bytes):
+
+@pytest.fixture
+def location_ref_to_uprn_map() -> Dict[str, int]:
+    return {
+        "B000RAND": 1,
+        "B000BLOCK": 2,
+        "B000FAKE": 3,
+        "B000MIS": 4,
+    }
+
+
+def test_peabody_parser_parses_conditions(
+    peabody_assets_xlsx_bytes, location_ref_to_uprn_map
+):
    # arrange
    parser = PeabodyParser()

    # act
-    result: Any = parser.parse(peabody_assets_xlsx_bytes)
+    result: Any = parser.parse(peabody_assets_xlsx_bytes, location_ref_to_uprn_map)

    # assert
    assert len(result) == 3

    assert all(isinstance(item, PeabodyProperty) for item in result)

+
@pytest.fixture
 def asset_condition_factory():
    def _factory(full_address: str) -> PeabodyAssetCondition:
@ -165,6 +193,7 @@ def asset_condition_factory():

    return _factory

+
@pytest.mark.parametrize(
    "full_address, expected_block_level",
    [
@ -175,7 +204,7 @@ def asset_condition_factory():
        ("81A-B  GORE ROAD    LONDON", True),
        ("73 & 74 HARVEST COURT  ST. ALBANS", True),
        ("25  HAVERSHAM COURT  GREENFORD", False),
-        ("FLAT 10  SPARROW COURT  SOUTHMERE DRIVE  LONDON  SE2 9ES", False)
+        ("FLAT 10  SPARROW COURT  SOUTHMERE DRIVE  LONDON  SE2 9ES", False),
    ],
 )
 def test_peabody_asset_is_block_level(
--- a/backend/condition/tests/persistence/test_condition_postgres.py
+++ b/backend/condition/tests/persistence/test_condition_postgres.py
@ -0,0 +1,164 @@
+import pytest
+from datetime import date
+
+from backend.condition.persistence.condition_postgres import ConditionPostgres
+from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
+from backend.condition.domain.element import Element
+from backend.condition.domain.element_type import ElementType
+from backend.condition.domain.aspect_condition import AspectCondition
+from backend.condition.domain.aspect_type import AspectType
+from backend.app.db.models.condition import PropertyConditionSurveyModel
+from backend.condition.tests.custom_asserts import CustomAsserts
+
+
+def test_map_survey_to_model() -> None:
+    # arrange
+    survey = PropertyConditionSurvey(
+        uprn=1,
+        elements=[
+            Element(
+                element_type=ElementType.EXTERNAL_WINDOWS,
+                element_instance=1,
+                aspect_conditions=[
+                    AspectCondition(
+                        aspect_type=AspectType.MATERIAL,
+                        aspect_instance=1,
+                        value="UPVC Double Glazed",
+                        quantity=8,
+                        install_date=None,
+                        renewal_year=2036,
+                        comments=None,
+                    ),
+                ],
+            ),
+            Element(
+                element_type=ElementType.EXTERNAL_DECORATION,
+                element_instance=1,
+                aspect_conditions=[
+                    AspectCondition(
+                        aspect_type=AspectType.CONDITION,
+                        aspect_instance=1,
+                        value="Normal",
+                        quantity=1,
+                        install_date=None,
+                        renewal_year=2029,
+                        comments=None,
+                    )
+                ],
+            ),
+            Element(
+                element_type=ElementType.EXTERNAL_WALL,
+                element_instance=1,
+                aspect_conditions=[
+                    AspectCondition(
+                        aspect_type=AspectType.FINISH,
+                        aspect_instance=1,
+                        value="Pointed",
+                        quantity=65,
+                        install_date=None,
+                        renewal_year=2045,
+                        comments=None,
+                    ),
+                    AspectCondition(
+                        aspect_type=AspectType.FINISH,
+                        aspect_instance=1,
+                        value="Pointing",
+                        quantity=1,
+                        install_date=None,
+                        renewal_year=2069,
+                        comments=None,
+                    ),
+                    AspectCondition(
+                        aspect_type=AspectType.FINISH,
+                        aspect_instance=2,
+                        value="Tile Hung",
+                        quantity=8,
+                        install_date=None,
+                        renewal_year=2049,
+                        comments=None,
+                    ),
+                ],
+            ),
+        ],
+        date=date(2000, 1, 1),
+        source="Peabody",
+    )
+
+    expected = {
+        "uprn": 1,
+        "date": date(2000, 1, 1),
+        "source": "Peabody",
+        "elements": [
+            {
+                "element_type": ElementType.EXTERNAL_WINDOWS,
+                "element_instance": 1,
+                "aspects": [
+                    {
+                        "aspect_type": AspectType.MATERIAL,
+                        "aspect_instance": 1,
+                        "value": "UPVC Double Glazed",
+                        "quantity": 8,
+                        "install_date": None,
+                        "renewal_year": 2036,
+                        "comments": None,
+                    }
+                ],
+            },
+            {
+                "element_type": ElementType.EXTERNAL_DECORATION,
+                "element_instance": 1,
+                "aspects": [
+                    {
+                        "aspect_type": AspectType.CONDITION,
+                        "aspect_instance": 1,
+                        "value": "Normal",
+                        "quantity": 1,
+                        "install_date": None,
+                        "renewal_year": 2029,
+                        "comments": None,
+                    }
+                ],
+            },
+            {
+                "element_type": ElementType.EXTERNAL_WALL,
+                "element_instance": 1,
+                "aspects": [
+                    {
+                        "aspect_instance": 1,
+                        "value": "Pointed",
+                        "quantity": 65,
+                        "install_date": None,
+                        "renewal_year": 2045,
+                        "comments": None,
+                    },
+                    {
+                        "aspect_type": AspectType.FINISH,
+                        "aspect_instance": 1,
+                        "value": "Pointing",
+                        "quantity": 1,
+                        "install_date": None,
+                        "renewal_year": 2069,
+                        "comments": None,
+                    },
+                    {
+                        "aspect_type": AspectType.FINISH,
+                        "aspect_instance": 2,
+                        "value": "Tile Hung",
+                        "quantity": 8,
+                        "install_date": None,
+                        "renewal_year": 2049,
+                        "comments": None,
+                    },
+                ],
+            },
+        ],
+    }
+
+    # act
+    model: PropertyConditionSurveyModel = ConditionPostgres.map_survey_to_model(survey)
+
+    # assert (survey level)
+    CustomAsserts.assert_property_condition_survey_model_matches_expected(
+        model,
+        expected,
+    )
--- a/backend/engine/requirements.txt
+++ b/backend/engine/requirements.txt
@ -1,3 +1,4 @@
+
 # Pandas and numpy
 numpy==2.1.2
 pandas==2.2.3
--- a/backend/postcode_splitter/hackney.xlsx
+++ b/backend/postcode_splitter/hackney.xlsx
--- a/backend/postcode_splitter/main.py
+++ b/backend/postcode_splitter/main.py
@ -0,0 +1,114 @@
+import pandas as pd
+import requests
+from backend.address2UPRN.main import resolve_uprns_for_postcode_group, get_epc_data_with_postcode
+from tqdm import tqdm
+
+
+
+def sanitise_postcode(postcode: str) -> str | None:
+    """
+    Normalise postcode for grouping.
+
+    - Uppercase
+    - Remove all whitespace
+    """
+    if pd.isna(postcode):
+        return None
+
+    return postcode.upper().replace(" ", "")
+
+
+def is_valid_postcode(postcode_clean: str) -> bool:
+    """
+    Validate postcode using postcodes.io.
+
+    Expects a sanitised postcode (e.g. E84SQ).
+    Returns True if valid, False otherwise.
+    """
+    POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
+    if not postcode_clean:
+        return False
+
+    try:
+        resp = requests.get(
+            POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
+            timeout=5,
+        )
+        resp.raise_for_status()
+        return resp.json().get("result", False)
+    except requests.RequestException:
+        # Network issues, rate limits, etc.
+        return False
+
+
+def main():
+    df = pd.read_excel("hackney.xlsx", sheet_name="Sustainability")
+    df = df.head(500)
+
+    # Sanitise postcodes
+    df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode)
+
+    # --- validate AFTER grouping (save API calls) ---
+
+    # Get unique, non-null postcodes
+    unique_postcodes = (
+        df["postcode_clean"]
+        .dropna()
+        .unique()
+    )
+
+    # Validate each postcode once, TODOadd a progress bar
+    postcode_validity = {
+        pc: is_valid_postcode(pc)
+        for pc in tqdm(unique_postcodes, total=len(unique_postcodes))
+    }
+
+    # Map validity back onto dataframe
+    df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
+
+
+    results = []
+
+    for postcode, group_df in tqdm(
+        df[df["postcode_valid"]].groupby("postcode_clean"),
+        desc="Resolving UPRNs by postcode",
+    ):
+        try:
+            epc_df = get_epc_data_with_postcode(postcode)
+
+            if epc_df.empty:
+                tmp = group_df.copy()
+                tmp["found_uprn"] = None
+                tmp["status"] = "no_epc_results"
+                results.append(tmp)
+                continue
+
+            resolved = resolve_uprns_for_postcode_group(
+                group_df=group_df,
+                epc_df=epc_df,
+            )
+
+            results.append(resolved)
+
+        except Exception as e:
+            tmp = group_df.copy()
+            tmp["found_uprn"] = None
+            tmp["status"] = "exception"
+            tmp["error"] = str(e)
+            results.append(tmp)
+
+    final_df = pd.concat(results, ignore_index=True)
+    a = final_df[[
+            "best_match_lexiscore","Address 1",
+            "best_match_address", "Postcode",
+            "UPRN", "best_match_uprn"
+        ]] # add levi score to viewing
+    b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing
+    b = b[[
+            "best_match_lexiscore","Address 1",
+            "best_match_address", "Postcode",
+            "UPRN", "best_match_uprn"
+        ]] 
+
+if __name__ == "__main__":
+    main()
--- a/conftest.py
+++ b/conftest.py
@ -1,5 +1,11 @@
 import os
 from backend.app.config import get_settings
+import os
+from dotenv import load_dotenv
+import os
+
+# Load .env in conftest.py directory for local development
+load_dotenv()

 DEFAULT_ENV = {
    "API_KEY": "test",
@ -8,7 +14,10 @@ DEFAULT_ENV = {
    "DATA_BUCKET": "test",
    "PLAN_TRIGGER_BUCKET": "test",
    "ENGINE_SQS_URL": "test",
-    "EPC_AUTH_TOKEN": "test",  # overridden in GitHub Actions
+    "EPC_AUTH_TOKEN": os.getenv(
+        "EPC_AUTH_TOKEN",
+        "test",
+    ),  # overridden in GitHub Actions
    "GOOGLE_SOLAR_API_KEY": "test",
    "DB_HOST": "localhost",
    "DB_USERNAME": "test",
--- a/etl/customers/peabody/Nov
+++ b/etl/customers/peabody/Nov
@ -1,111 +1,111 @@
 import pandas as pd

-epc_c_recommendations = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
-    "solid floor, ashp 3.0 - corrected.xlsx"
-)
-epc_b_recommendations = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
-    "solid floor, ashp 3.0 - corrected.xlsx"
-)
+# epc_c_recommendations = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+#     "solid floor, ashp 3.0 - corrected.xlsx"
+# )
+# epc_b_recommendations = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC B - no "
+#     "solid floor, ashp 3.0 - corrected.xlsx"
+# )

-epc_c_movers = epc_b_recommendations[
-    epc_b_recommendations["current_epc_rating"] == "Epc.C"
-    ]
-epc_c_movers["property_type"].value_counts()
+# epc_c_movers = epc_b_recommendations[
+#     epc_b_recommendations["current_epc_rating"] == "Epc.C"
+#     ]
+# epc_c_movers["property_type"].value_counts()

-house_epc_c_movers = epc_c_movers[
-    epc_c_movers["property_type"] == "House"
-    ]
-house_epc_c_movers_with_solar = house_epc_c_movers[
-    ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
-    ]
+# house_epc_c_movers = epc_c_movers[
+#     epc_c_movers["property_type"] == "House"
+#     ]
+# house_epc_c_movers_with_solar = house_epc_c_movers[
+#     ~pd.isnull(house_epc_c_movers["solar_pv"]) | ~pd.isnull(house_epc_c_movers["solar_pv_with_battery"])
+#     ]

-house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
-    ~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
-]
+# house_epc_c_movers_with_a_heatpump = house_epc_c_movers[
+#     ~pd.isnull(house_epc_c_movers["air_source_heat_pump"])
+# ]

-flat_epc_c_movers = epc_c_movers[
-    epc_c_movers["property_type"] == "Flat"
-    ]
+# flat_epc_c_movers = epc_c_movers[
+#     epc_c_movers["property_type"] == "Flat"
+#     ]

-epc_c_recommendations["sap_points"].mean()
-epc_c_recommendations["sap_points"].mean()
+# epc_c_recommendations["sap_points"].mean()
+# epc_c_recommendations["sap_points"].mean()

-measure_cols = [
-    "air_source_heat_pump",
-    "boiler_upgrade",
-    "cavity_wall_insulation",
-    "double_glazing",
-    "external_wall_insulation",
-    "flat_roof_insulation",
-    "high_heat_retention_storage_heaters",
-    "internal_wall_insulation",
-    "loft_insulation",
-    "low_energy_lighting",
-    "mechanical_ventilation",
-    "room_roof_insulation",
-    "roomstat_programmer_trvs",
-    "sealing_open_fireplace",
-    "secondary_glazing",
-    "secondary_heating",
-    "solar_pv",
-    "solar_pv_with_battery",
-    "suspended_floor_insulation",
-    "time_temperature_zone_control",
-]
+# measure_cols = [
+#     "air_source_heat_pump",
+#     "boiler_upgrade",
+#     "cavity_wall_insulation",
+#     "double_glazing",
+#     "external_wall_insulation",
+#     "flat_roof_insulation",
+#     "high_heat_retention_storage_heaters",
+#     "internal_wall_insulation",
+#     "loft_insulation",
+#     "low_energy_lighting",
+#     "mechanical_ventilation",
+#     "room_roof_insulation",
+#     "roomstat_programmer_trvs",
+#     "sealing_open_fireplace",
+#     "secondary_glazing",
+#     "secondary_heating",
+#     "solar_pv",
+#     "solar_pv_with_battery",
+#     "suspended_floor_insulation",
+#     "time_temperature_zone_control",
+# ]

-epc_c_melted = (
-    epc_c_recommendations
-    .melt(
-        id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
-        value_vars=measure_cols,
-        var_name="measure_type",
-        value_name="value",
-    )
-    .dropna(subset=["value"])
-)
-epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
-epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
+# epc_c_melted = (
+#     epc_c_recommendations
+#     .melt(
+#         id_vars=[c for c in epc_c_recommendations.columns if c not in measure_cols],
+#         value_vars=measure_cols,
+#         var_name="measure_type",
+#         value_name="value",
+#     )
+#     .dropna(subset=["value"])
+# )
+# epc_c_melted = epc_c_melted[epc_c_melted["value"] > 0]
+# epc_c_measures = epc_c_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()

-epc_b_melted = (
-    epc_b_recommendations
-    .melt(
-        id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
-        value_vars=measure_cols,
-        var_name="measure_type",
-        value_name="value",
-    )
-    .dropna(subset=["value"])
-)
+# epc_b_melted = (
+#     epc_b_recommendations
+#     .melt(
+#         id_vars=[c for c in epc_b_recommendations.columns if c not in measure_cols],
+#         value_vars=measure_cols,
+#         var_name="measure_type",
+#         value_name="value",
+#     )
+#     .dropna(subset=["value"])
+# )

-epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
-epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()
+# epc_b_melted = epc_b_melted[epc_b_melted["value"] > 0]
+# epc_b_measures = epc_b_melted["measure_type"].value_counts(normalize=True).to_frame().reset_index()

-measures_compared = epc_c_measures.merge(
-    epc_b_measures,
-    left_on="measure_type",
-    right_on="measure_type",
-    suffixes=("_epc_c", "_epc_b"),
-)
+# measures_compared = epc_c_measures.merge(
+#     epc_b_measures,
+#     left_on="measure_type",
+#     right_on="measure_type",
+#     suffixes=("_epc_c", "_epc_b"),
+# )

-epc_c_retrofits = epc_c_recommendations[
-    epc_c_recommendations["total_retrofit_cost"] > 0
-    ]
+# epc_c_retrofits = epc_c_recommendations[
+#     epc_c_recommendations["total_retrofit_cost"] > 0
+#     ]

-epc_b_retrofits = epc_b_recommendations[
-    epc_b_recommendations["total_retrofit_cost"] > 0
-    ]
+# epc_b_retrofits = epc_b_recommendations[
+#     epc_b_recommendations["total_retrofit_cost"] > 0
+#     ]

-epc_c_retrofits["sap_points"].mean()
-epc_b_retrofits["sap_points"].mean()
+# epc_c_retrofits["sap_points"].mean()
+# epc_b_retrofits["sap_points"].mean()

-properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))
+# properties_in_both = epc_c_retrofits.merge(epc_b_retrofits, on="uprn", suffixes=("_epc_c", "_epc_b"))

-properties_in_both["total_retrofit_cost_epc_c"].mean()
-properties_in_both["sap_points_epc_c"].mean()
-properties_in_both["total_retrofit_cost_epc_b"].mean()
-properties_in_both["sap_points_epc_b"].mean()
+# properties_in_both["total_retrofit_cost_epc_c"].mean()
+# properties_in_both["sap_points_epc_c"].mean()
+# properties_in_both["total_retrofit_cost_epc_b"].mean()
+# properties_in_both["sap_points_epc_b"].mean()

 # Solar PV savings - we need the amount of solar PV bill savings
 from sqlalchemy.orm import sessionmaker
@ -114,16 +114,12 @@ from backend.app.db.models.recommendations import Recommendation, Plan, PlanReco
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
 from collections import defaultdict

-PORTFOLIO_ID = 435  # Peabody
+PORTFOLIO_ID = 485  # Peabody
 SCENARIOS = [
-    908,
-    909,
-    910,
+    970
 ]
 scenario_names = {
-    908: "EPC C - no solid floor, ashp 3.0",
-    909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0",
-    910: "EPC B - no solid floor, no EWI, ashp 3.0"
+    970: "EPC C - no solid floor, ashp 3.0",
 }


@ -236,307 +232,266 @@ recommendations_df = pd.DataFrame(recommendations_data)
 properties_df = pd.DataFrame(properties_data)
 plans_df = pd.DataFrame(plans_data)

-s_id = 910
-ps_w_a_plan = plans_df[plans_df["scenario_id"] == s_id].copy()
-# Take the newest by scenario id
-ps_w_a_plan = ps_w_a_plan.sort_values("created_at", ascending=False).drop_duplicates(
-    subset=["property_id"]
-)
-z = ps_w_a_plan[
-    ps_w_a_plan["cost_of_works"] > 0
-    ].copy()
-z2 = properties_df[properties_df["property_id"].isin(z["property_id"].values)]
-# '', 'hot_water_cost_current',
-#        'lighting_cost_current', 'appliances_cost_current',
-#        'gas_standing_charge', 'electricity_standing_charge'
-z2["total_bills"] = z2["heating_cost_current"] + z2["hot_water_cost_current"] + z2["lighting_cost_current"] + z2[
-    "appliances_cost_current"
-] + z2["gas_standing_charge"] + z2["electricity_standing_charge"]
+with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer:
+    recommendations_df.to_excel(writer, sheet_name="recommendations", index=False)
+    properties_df.to_excel(writer, sheet_name="properties", index=False)

-from tqdm import tqdm
    
-# For a property ID, find a property where the no EWI/IWI approach is more expensive than the EWI approach
-pids = properties_df["property_id"].unique()
-for pid in tqdm(pids):
+# solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
+# average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()

-    if pid in [603272, 550550, 574493]:
-        continue

-    # get the plans
-    property_plan = plans_df[plans_df["property_id"] == int(pid)]
-    # Take the newest plan by scenario id
-    property_plan = property_plan.sort_values("created_at", ascending=False).drop_duplicates(
-        subset=["scenario_id"]
-    )
-    a = property_plan[property_plan["scenario_id"] == 909].squeeze()  # no EWI/IWI
-    b = property_plan[property_plan["scenario_id"] == 908].squeeze()  # EWI
-    if (a["cost_of_works"] > b["cost_of_works"]) and (
-        a["post_epc_rating"].value == "C") and (b["cost_of_works"] > 5000):
-        bah

-solar_pv_recommendations = recommendations_df[
-    recommendations_df["measure_type"] == "solar_pv"
-    ]
+# # Check tenures
+# initial_asset_data = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
+#     "- Data Extracts for Domna.xlsx",
+#     sheet_name="Properties"
+# )
+# sustainability_data = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
+#     "- Data Extracts for Domna.xlsx",
+#     sheet_name="Sustainability"
+# )

-solid_wall_recommendation = recommendations_df[
-    recommendations_df["scenario_id"].isin([908]) &
-    recommendations_df["measure_type"].isin(["internal_wall_insulation"]) &
-    recommendations_df["default"]
-    ]
-average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
-# Add on scenarion names
-average_savings["scenario_name"] = average_savings["scenario_id"].map(scenario_names)
+# sustainability_sample = sustainability_data[
+#     sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values)
+# ]

-# Check tenures
-initial_asset_data = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
-    "- Data Extracts for Domna.xlsx",
-    sheet_name="Properties"
-)
-sustainability_data = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
-    "- Data Extracts for Domna.xlsx",
-    sheet_name="Sustainability"
-)
+# sustainability_sample = sustainability_sample.merge(
+#     initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset")
+# )

-sustainability_sample = sustainability_data[
-    sustainability_data["UPRN"].isin(properties_df["uprn"].astype(int).astype(str).values)
-]
+# block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
+# block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False)

-sustainability_sample = sustainability_sample.merge(
-    initial_asset_data, left_on="Org Ref", right_on="UPRN", suffixes=("_sustainability", "_initial_asset")
-)
+# initial_asset_data.columns
+# initial_asset_data["LeaseType"].value_counts()

-block_sizes = initial_asset_data["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
-block_sizes.to_excel("/Users/khalimconn-kowlessar/Downloads/peabody_block_sizes.xlsx", index=False)
+# # sustainability_sample["Tenure Group"].value_counts()
+# # Tenure Group
+# # General Needs               57787
+# # Home Ownership              25471
+# # Care & Supported Housing     4239
+# # Rental                       2677
+# # Other                         188

-initial_asset_data.columns
-initial_asset_data["LeaseType"].value_counts()
+# df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index()
+# df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False)

-# sustainability_sample["Tenure Group"].value_counts()
-# Tenure Group
-# General Needs               57787
-# Home Ownership              25471
-# Care & Supported Housing     4239
-# Rental                       2677
-# Other                         188
+# tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index()
+# tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)

-df = sustainability_sample["Ownership Type"].value_counts().to_frame().reset_index()
-df.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenures.xlsx", index=False)
+# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()

-tenure_groups = sustainability_sample["Tenure Group"].value_counts().to_frame().reset_index()
-tenure_groups.to_excel("/Users/khalimconn-kowlessar/Downloads/sustainability_tenure_groups.xlsx", index=False)
+# sample_data = initial_asset_data[
+#     ~initial_asset_data["Ownership Type"].isin(
+#         [
+#             # Commercial # Everything is resi - based on the Residential Indicator variable - all are true
+#             # Freeholder
+#             "FREEHOLDER",  # 19517 properties
+#             # HOMEBUY / EQUITY LOAN
+#             "Rent to Homebuy",  # 1 property
+#             # Leaseholder
+#             "LEASEHOLD 100%",  # 8455 properties
+#             "Owned and Managed - 999 year lease",  # 2076 properties
+#             "Managed but not Owned-Private Lease",  # 159 properties
+#             "Owned and managed LEASEHOLD",  # 26 properties
+#             # Outright Sale - can't find anything matching
+#             # SHARED EQUITY
+#             "Shared Ownership",  # 4065 properties
+#             "Shared Ownership Owned Not Managed",  # 23 properties
+#             # Extra categories which seem sensible to exclude
+#             "NOT MANAGED AND NOT OWNED"
+#         ]
+#     )
+# ]

-initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Tenure Group"].value_counts()
+# sample_data["Ownership Type"].value_counts()

-sample_data = initial_asset_data[
-    ~initial_asset_data["Ownership Type"].isin(
-        [
-            # Commercial # Everything is resi - based on the Residential Indicator variable - all are true
-            # Freeholder
-            "FREEHOLDER",  # 19517 properties
-            # HOMEBUY / EQUITY LOAN
-            "Rent to Homebuy",  # 1 property
-            # Leaseholder
-            "LEASEHOLD 100%",  # 8455 properties
-            "Owned and Managed - 999 year lease",  # 2076 properties
-            "Managed but not Owned-Private Lease",  # 159 properties
-            "Owned and managed LEASEHOLD",  # 26 properties
-            # Outright Sale - can't find anything matching
-            # SHARED EQUITY
-            "Shared Ownership",  # 4065 properties
-            "Shared Ownership Owned Not Managed",  # 23 properties
-            # Extra categories which seem sensible to exclude
-            "NOT MANAGED AND NOT OWNED"
-        ]
-    )
-]
+# sample_data = initial_asset_data[
+#     initial_asset_data["Ownership Type"].isin(
+#         [
+#             "Owned and Managed",
+#             "Owned and Managed - 999 year lease",
+#             "Owned and managed LEASEHOLD",
+#             "LEASEHOLD 100%",
+#             "DATALOAD DEFAULT"
+#         ]
+#     )
+# ]
+# dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)]
+# dropped["Ownership Type"].value_counts()

-sample_data["Ownership Type"].value_counts()
+# for value in [
+#     # Commercial # Everything is resi, so should be fine. No matches
+#     # Freeholder
+#     "FREEHOLDER",  # 19517 properties
+#     # HOMEBUY / EQUITY LOAN
+#     "Rent to Homebuy",  # 1 property
+#     # Leaseholder
+#     "LEASEHOLD 100%",  # 8455 properties
+#     "Owned and Managed - 999 year lease",  # 2076 properties
+#     "Managed but not Owned-Private Lease",  # 159 properties
+#     "Owned and managed LEASEHOLD",  # 26 properties
+#     # Outright Sale - can't find anything matching
+#     # SHARED EQUITY
+#     "Shared Ownership",  # 4065 properties
+#     "Shared Ownership Owned Not Managed",  # 23 properties
+# ]:
+#     print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0])

-sample_data = initial_asset_data[
-    initial_asset_data["Ownership Type"].isin(
-        [
-            "Owned and Managed",
-            "Owned and Managed - 999 year lease",
-            "Owned and managed LEASEHOLD",
-            "LEASEHOLD 100%",
-            "DATALOAD DEFAULT"
-        ]
-    )
-]
-dropped = initial_asset_data[~initial_asset_data["UPRN"].isin(sample_data["UPRN"].values)]
-dropped["Ownership Type"].value_counts()
+# house_types = [
+#     "HOUSE",
+#     "BUNGALOW",
+#     "MAISONETTE",
+#     "DUPLEX",
+# ]

-for value in [
-    # Commercial # Everything is resi, so should be fine. No matches
-    # Freeholder
-    "FREEHOLDER",  # 19517 properties
-    # HOMEBUY / EQUITY LOAN
-    "Rent to Homebuy",  # 1 property
-    # Leaseholder
-    "LEASEHOLD 100%",  # 8455 properties
-    "Owned and Managed - 999 year lease",  # 2076 properties
-    "Managed but not Owned-Private Lease",  # 159 properties
-    "Owned and managed LEASEHOLD",  # 26 properties
-    # Outright Sale - can't find anything matching
-    # SHARED EQUITY
-    "Shared Ownership",  # 4065 properties
-    "Shared Ownership Owned Not Managed",  # 23 properties
-]:
-    print(initial_asset_data[initial_asset_data["Ownership Type"] == value].shape[0])
+# guaranteed_control = [
+#     "Owned and Managed",
+#     "Owned and Managed - 999 year lease",
+#     "Owned and managed LEASEHOLD",
+#     "LEASEHOLD 100%",
+#     "DATALOAD DEFAULT",
+# ]

-house_types = [
-    "HOUSE",
-    "BUNGALOW",
-    "MAISONETTE",
-    "DUPLEX",
-]
+# sample_data = initial_asset_data[
+#     (
+#         initial_asset_data["Ownership Type"].isin(guaranteed_control)
+#     )
+#     |
+#     (
+#         (initial_asset_data["Ownership Type"] == "FREEHOLDER")
+#         &
+#         (initial_asset_data["Property Type"].isin(house_types))
+#     )
+#     ]

-guaranteed_control = [
-    "Owned and Managed",
-    "Owned and Managed - 999 year lease",
-    "Owned and managed LEASEHOLD",
-    "LEASEHOLD 100%",
-    "DATALOAD DEFAULT",
-]
+# fabric_retrofit_sample = initial_asset_data[
+#     initial_asset_data["Ownership Type"].isin(
+#         [
+#             "Owned and Managed",
+#             "FREEHOLDER",
+#             "DATALOAD DEFAULT",
+#         ]
+#     )
+# ]

-sample_data = initial_asset_data[
-    (
-        initial_asset_data["Ownership Type"].isin(guaranteed_control)
-    )
-    |
-    (
-        (initial_asset_data["Ownership Type"] == "FREEHOLDER")
-        &
-        (initial_asset_data["Property Type"].isin(house_types))
-    )
-    ]
+# initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
+# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()

-fabric_retrofit_sample = initial_asset_data[
-    initial_asset_data["Ownership Type"].isin(
-        [
-            "Owned and Managed",
-            "FREEHOLDER",
-            "DATALOAD DEFAULT",
-        ]
-    )
-]
+# initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts()
+# z = initial_asset_data[
+#     ~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types)
+#     ]

-initial_asset_data[pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
-initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Ownership Type"].value_counts()
+# block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
+# zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"]

-initial_asset_data[~pd.isnull(initial_asset_data["BlockCode"])]["Property Type"].value_counts()
-z = initial_asset_data[
-    ~pd.isnull(initial_asset_data["BlockCode"]) & initial_asset_data["Property Type"].isin(house_types)
-    ]
+# potential_sample = initial_asset_data[
+#     ~pd.isnull(initial_asset_data["BlockCode"])
+# ]

-block_code_agg = z["BlockCode"].value_counts().reset_index().sort_values("count", ascending=False)
-zz = initial_asset_data[initial_asset_data["BlockCode"] == "CHAT3343FM"]
+# compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
+#     initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
+#     left_on="Property Type",
+#     right_on="Property Type",
+#     suffixes=("_on_block_codes", "_overall")
+# )

-potential_sample = initial_asset_data[
-    ~pd.isnull(initial_asset_data["BlockCode"])
-]
+# # Comparison of smaller sample vs overall
+# new_asset_data = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
+#     "- Peabody "
+#     "- Data Extracts for Domna v2.xlsx",
+#     sheet_name="Properties"
+# )

-compare = potential_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
-    initial_asset_data["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
-    left_on="Property Type",
-    right_on="Property Type",
-    suffixes=("_on_block_codes", "_overall")
-)
+# new_sustainability_data = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
+#     "- Peabody "
+#     "- Data Extracts for Domna v2.xlsx",
+#     sheet_name="Sustainability"
+# )

-# Comparison of smaller sample vs overall
-new_asset_data = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
-    "- Peabody "
-    "- Data Extracts for Domna v2.xlsx",
-    sheet_name="Properties"
-)
+# sap_bands = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
+#     "08012026.xlsx",
+# )

-new_sustainability_data = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
-    "- Peabody "
-    "- Data Extracts for Domna v2.xlsx",
-    sheet_name="Sustainability"
-)
+# combined = new_asset_data.merge(
+#     new_sustainability_data,
+#     left_on="UPRN",
+#     right_on="Org Ref",
+#     suffixes=("_asset", "_sustainability")
+# ).merge(
+#     sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef"
+# )
+# reduced_sample = combined[
+#     ~combined["AH Tenure"].isin(
+#         ["Commercial",
+#          "Freeholder",
+#          "HOMEBUY / EQUITY LOAN",
+#          "Leaseholder",
+#          "Outright Sale",
+#          "SHARED EQUITY",
+#          "Shared Ownership"]
+#     )
+# ].copy()

-sap_bands = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data "
-    "08012026.xlsx",
-)
+# # property types
+# property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
+#     combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
+#     left_on="Property Type",
+#     right_on="Property Type",
+#     suffixes=("_reduced_sample", "_overall")
+# )

-combined = new_asset_data.merge(
-    new_sustainability_data,
-    left_on="UPRN",
-    right_on="Org Ref",
-    suffixes=("_asset", "_sustainability")
-).merge(
-    sap_bands[["OrgRef", "SAP Band", "Lodged EPC Band"]], how="left", left_on="Org Ref", right_on="OrgRef"
-)
-reduced_sample = combined[
-    ~combined["AH Tenure"].isin(
-        ["Commercial",
-         "Freeholder",
-         "HOMEBUY / EQUITY LOAN",
-         "Leaseholder",
-         "Outright Sale",
-         "SHARED EQUITY",
-         "Shared Ownership"]
-    )
-].copy()
+# # lodged ratings
+# lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts(
+#     normalize=True).to_frame().reset_index().merge(
+#     combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(),
+#     left_on="Lodged EPC Band",
+#     right_on="Lodged EPC Band",
+#     suffixes=("_reduced_sample", "_overall")
+# )

-# property types
-property_type_comparison = reduced_sample["Property Type"].value_counts(normalize=True).to_frame().reset_index().merge(
-    combined["Property Type"].value_counts(normalize=True).to_frame().reset_index(),
-    left_on="Property Type",
-    right_on="Property Type",
-    suffixes=("_reduced_sample", "_overall")
-)
+# # modelled ratings
+# modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts(
+#     normalize=True).to_frame().reset_index().merge(
+#     combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(),
+#     left_on="SAP Band",
+#     right_on="SAP Band",
+#     suffixes=("_reduced_sample", "_overall")
+# )

-# lodged ratings
-lodged_epc_band_comparison = reduced_sample["Lodged EPC Band"].value_counts(
-    normalize=True).to_frame().reset_index().merge(
-    combined["Lodged EPC Band"].value_counts(normalize=True).to_frame().reset_index(),
-    left_on="Lodged EPC Band",
-    right_on="Lodged EPC Band",
-    suffixes=("_reduced_sample", "_overall")
-)
+# # Testing measures
+# m1 = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+#     "solid floor, ashp 3.0 - 20250113 final.xlsx"
+# )
+# m2 = pd.read_excel(
+#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
+#     "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
+# )

-# modelled ratings
-modelled_epc_band_comparison = reduced_sample["SAP Band"].value_counts(
-    normalize=True).to_frame().reset_index().merge(
-    combined["SAP Band"].value_counts(normalize=True).to_frame().reset_index(),
-    left_on="SAP Band",
-    right_on="SAP Band",
-    suffixes=("_reduced_sample", "_overall")
-)
+# compare = m1.merge(
+#     m2,
+#     left_on="uprn",
+#     right_on="uprn",
+#     suffixes=("_ewi_iwi", "_no_ewi_iwi")
+# )

-# Testing measures
-m1 = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
-    "solid floor, ashp 3.0 - 20250113 final.xlsx"
-)
-m2 = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/EPC C - no "
-    "solid floor, no EWI or IWI, ashp 3.0 - 20250113 final.xlsx"
-)
+# # Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario
+# only_no_ewi_iwi = compare[
+#     (compare["total_retrofit_cost_ewi_iwi"] == 0) &
+#     (compare["total_retrofit_cost_no_ewi_iwi"] != 0)
+#     ]

-compare = m1.merge(
-    m2,
-    left_on="uprn",
-    right_on="uprn",
-    suffixes=("_ewi_iwi", "_no_ewi_iwi")
-)
+# (m1["total_retrofit_cost"] > 0).sum()
+# (m2["total_retrofit_cost"] > 0).sum()

-# Which properties get done under the no EWI/IWI scenario that do not under the EWI/IWI scenario
-only_no_ewi_iwi = compare[
-    (compare["total_retrofit_cost_ewi_iwi"] == 0) &
-    (compare["total_retrofit_cost_no_ewi_iwi"] != 0)
-    ]
+# with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0]

-(m1["total_retrofit_cost"] > 0).sum()
-(m2["total_retrofit_cost"] > 0).sum()
-
-with_ewi_projects = compare[compare["total_retrofit_cost_no_ewi_iwi"] > 0]
-
-z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])]
+# z = with_ewi_projects[pd.isnull(with_ewi_projects["total_retrofit_cost_ewi_iwi"])]
--- a/infrastructure/terraform/lambda/_template/README.md
+++ b/infrastructure/terraform/lambda/_template/README.md
@ -0,0 +1,51 @@
+## Checklist for adding a new Lambda
+
+### 1. Create the Lambda scaffold
+- Copy the template:
+
+  cp -r lambda/_template lambda/<lambda_name>
+
+---
+
+### 2. Add infrastructure prerequisites (shared stack)
+- Add a new ECR repository in:
+
+  infrastructure/terraform/shared/main.tf
+
+- Apply the shared stack
+    - This requires commenting 'if env.stage == "prod"' in .github/workflows/deploy_terraform.yml
+
+- Verify the ECR repository exists in AWS
+
+---
+
+### 3. Add Docker build configuration
+- Create a `Dockerfile` for the Lambda
+- Verify the Dockerfile path and build context
+- Add a new image build job in `deploy_terraform.yml` using `_build_image.yml`
+
+---
+
+### 4. Wire the Lambda deploy job (CI)
+- Add a deploy job using `_deploy_lambda.yml`
+- Ensure the deploy job depends on the image build job
+
+---
+
+### 5. Deploy
+- Push changes to GitHub
+- CI will:
+  1. Build and push the Docker image
+  2. Deploy the Lambda 
+  3. Verify everything deployed. Good things to check:
+    - ECR with image
+    - SQS
+    - Trigger SQS
+      - Cloud watch logs
+---
+### 5. Delete
+  1. Delete README if you used cp -r
+
+---
+
+## Please feel free to update this document to make it easier for the next person
--- a/infrastructure/terraform/lambda/_template/main.tf
+++ b/infrastructure/terraform/lambda/_template/main.tf
@ -0,0 +1,14 @@
+module "lambda" {
+  source = "../modules/lambda_with_sqs"
+
+  name  = REPLACE ME #"address2uprn" for example
+  stage = var.stage
+
+  image_uri = local.image_uri
+
+
+  environment = {
+    STAGE = var.stage
+    LOG_LEVEL = "info"
+  }
+}
--- a/infrastructure/terraform/lambda/_template/provider.tf
+++ b/infrastructure/terraform/lambda/_template/provider.tf
@ -0,0 +1,16 @@
+terraform {
+  required_providers {
+    aws = {
+      source  = "hashicorp/aws"
+      version = "~> 4.16"
+    }
+  }
+
+  backend "s3" {
+    bucket = REPLACE_ME
+    key    = "terraform.tfstate"
+    region = "eu-west-2"
+  }
+
+  required_version = ">= 1.2.0"
+}
--- a/infrastructure/terraform/lambda/_template/variables.tf
+++ b/infrastructure/terraform/lambda/_template/variables.tf
@ -0,0 +1,27 @@
+variable "lambda_name" {
+  type        = string
+  description = "Logical name of the lambda (e.g. address2uprn)"
+}
+
+variable "stage" {
+  description = "Deployment stage (e.g. dev, prod)"
+  type        = string
+}
+variable "ecr_repo_url" {
+  type        = string
+  description = "ECR repository URL (no tag, no digest)"
+}
+
+variable "image_digest" {
+  type        = string
+  description = "Image digest (sha256:...)"
+}
+
+
+locals {
+  image_uri = "${var.ecr_repo_url}@${var.image_digest}"
+}
+
+output "resolved_image_uri" {
+  value = local.image_uri
+}
--- a/infrastructure/terraform/lambda/address2UPRN/main.tf
+++ b/infrastructure/terraform/lambda/address2UPRN/main.tf
@ -0,0 +1,14 @@
+module "address2uprn" {
+  source = "../modules/lambda_with_sqs"
+
+  name  = "address2uprn"
+  stage = var.stage
+
+  image_uri = local.image_uri
+
+
+  environment = {
+    STAGE     = var.stage
+    LOG_LEVEL = "info"
+  }
+}
--- a/infrastructure/terraform/lambda/address2UPRN/provider.tf
+++ b/infrastructure/terraform/lambda/address2UPRN/provider.tf
@ -0,0 +1,17 @@
+terraform {
+  required_providers {
+    aws = {
+      source  = "hashicorp/aws"
+      version = "~> 4.16"
+    }
+  }
+
+  backend "s3" {
+    bucket = "address2uprn-terraform-state"
+    key    = "terraform.tfstate"
+    region = "eu-west-2"
+  }
+
+  required_version = ">= 1.2.0"
+}
+
--- a/infrastructure/terraform/lambda/address2UPRN/variables.tf
+++ b/infrastructure/terraform/lambda/address2UPRN/variables.tf
@ -0,0 +1,27 @@
+variable "lambda_name" {
+  type        = string
+  description = "Logical name of the lambda (e.g. address2uprn)"
+}
+
+variable "stage" {
+  description = "Deployment stage (e.g. dev, prod)"
+  type        = string
+}
+variable "ecr_repo_url" {
+  type        = string
+  description = "ECR repository URL (no tag, no digest)"
+}
+
+variable "image_digest" {
+  type        = string
+  description = "Image digest (sha256:...)"
+}
+
+
+locals {
+  image_uri = "${var.ecr_repo_url}@${var.image_digest}"
+}
+
+output "resolved_image_uri" {
+  value = local.image_uri
+}
--- a/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf
+++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf
@ -0,0 +1,44 @@
+############################################
+# IAM role
+############################################
+module "role" {
+  source = "../../../modules/lambda_execution_role"
+  name   = "${var.name}-lambda-${var.stage}"
+}
+
+############################################
+# SQS queue + DLQ
+############################################
+module "queue" {
+  source = "../../../modules/sqs_queue"
+  name   = "${var.name}-queue-${var.stage}"
+}
+
+############################################
+# Lambda
+############################################
+module "lambda" {
+  source = "../../../modules/lambda_service"
+
+  name      = "${var.name}-${var.stage}"
+  role_arn = module.role.role_arn
+  image_uri = var.image_uri
+
+  timeout     = var.timeout
+  memory_size = var.memory_size
+
+  environment = var.environment
+}
+
+############################################
+# SQS → Lambda trigger
+############################################
+module "sqs_trigger" {
+  source = "../../../modules/lambda_sqs_trigger"
+
+  lambda_arn        = module.lambda.lambda_arn
+  lambda_role_name = module.role.role_name
+  queue_arn        = module.queue.queue_arn
+
+  batch_size = var.batch_size
+}
--- a/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf
+++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf
@ -0,0 +1,11 @@
+output "lambda_arn" {
+  value = module.lambda.lambda_arn
+}
+
+output "queue_arn" {
+  value = module.queue.queue_arn
+}
+
+output "queue_url" {
+  value = module.queue.queue_url
+}
--- a/infrastructure/terraform/lambda/modules/lambda_with_sqs/variables.tf
+++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/variables.tf
@ -0,0 +1,36 @@
+variable "name" {
+  type = string
+}
+
+variable "stage" {
+  type = string
+}
+
+variable "image_uri" {
+  type = string
+}
+
+variable "region" {
+  type    = string
+  default = "eu-west-2"
+}
+
+variable "timeout" {
+  type    = number
+  default = 60
+}
+
+variable "memory_size" {
+  type    = number
+  default = 1024
+}
+
+variable "environment" {
+  type    = map(string)
+  default = {}
+}
+
+variable "batch_size" {
+  type    = number
+  default = 10
+}
--- a/infrastructure/terraform/modules/container_registry/main.tf
+++ b/infrastructure/terraform/modules/container_registry/main.tf
@ -0,0 +1,30 @@
+resource "aws_ecr_repository" "this" {
+  name = "${var.name}-${var.stage}"
+
+  image_tag_mutability = "MUTABLE"
+
+  image_scanning_configuration {
+    scan_on_push = true
+  }
+}
+
+resource "aws_ecr_lifecycle_policy" "this" {
+  repository = aws_ecr_repository.this.name
+
+  policy = jsonencode({
+    rules = [
+      {
+        rulePriority = 1
+        description  = "Expire old images"
+        selection = {
+          tagStatus   = "any"
+          countType   = "imageCountMoreThan"
+          countNumber = var.retain_count
+        }
+        action = {
+          type = "expire"
+        }
+      }
+    ]
+  })
+}
--- a/infrastructure/terraform/modules/container_registry/outputs.tf
+++ b/infrastructure/terraform/modules/container_registry/outputs.tf
@ -0,0 +1,11 @@
+output "repository_name" {
+  value = aws_ecr_repository.this.name
+}
+
+output "repository_url" {
+  value = aws_ecr_repository.this.repository_url
+}
+
+output "repository_arn" {
+  value = aws_ecr_repository.this.arn
+}
--- a/infrastructure/terraform/modules/container_registry/variables.tf
+++ b/infrastructure/terraform/modules/container_registry/variables.tf
@ -0,0 +1,15 @@
+variable "name" {
+  description = "Base name of the repository (without stage)"
+  type        = string
+}
+
+variable "stage" {
+  description = "Deployment stage (e.g. dev, prod)"
+  type        = string
+}
+
+variable "retain_count" {
+  description = "Number of images to retain"
+  type        = number
+  default     = 10
+}
--- a/infrastructure/terraform/modules/ecr/main.tf
+++ b/infrastructure/terraform/modules/ecr/main.tf
@ -1,3 +1,6 @@
+# This ecr works for things deployed by serverless.
+# TODO: unify ecr and container_registry to one 
+
 resource "aws_ecr_repository" "my_repository" {
  name                 = "${var.ecr_name}"
  image_tag_mutability = "MUTABLE"
--- a/infrastructure/terraform/modules/ecr/outputs.tf
+++ b/infrastructure/terraform/modules/ecr/outputs.tf
@ -2,3 +2,9 @@ output "ecr_repository_name" {
  description = "Name of the EPR repo in AWS"
  value       = aws_ecr_repository.my_repository.name
 }
+
+
+output "ecr_repository_url" {
+  description = "Full ECR repository URL"
+  value       = aws_ecr_repository.my_repository.repository_url
+}
--- a/infrastructure/terraform/modules/lambda_execution_role/main.tf
+++ b/infrastructure/terraform/modules/lambda_execution_role/main.tf
@ -0,0 +1,37 @@
+data "aws_iam_policy_document" "assume" {
+  statement {
+    effect = "Allow"
+    principals {
+      type        = "Service"
+      identifiers = ["lambda.amazonaws.com"]
+    }
+    actions = ["sts:AssumeRole"]
+  }
+}
+
+resource "aws_iam_role" "this" {
+  name               = var.name
+  assume_role_policy = data.aws_iam_policy_document.assume.json
+}
+
+resource "aws_iam_role_policy_attachment" "basic_logs" {
+  role       = aws_iam_role.this.name
+  policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
+}
+
+resource "aws_iam_role_policy" "ecr_pull" {
+  role = aws_iam_role.this.name
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [{
+      Effect = "Allow"
+      Action = [
+        "ecr:GetAuthorizationToken",
+        "ecr:BatchGetImage",
+        "ecr:GetDownloadUrlForLayer"
+      ]
+      Resource = "*"
+    }]
+  })
+}
--- a/infrastructure/terraform/modules/lambda_execution_role/outputs.tf
+++ b/infrastructure/terraform/modules/lambda_execution_role/outputs.tf
@ -0,0 +1,7 @@
+output "role_arn" {
+  value = aws_iam_role.this.arn
+}
+
+output "role_name" {
+  value = aws_iam_role.this.name
+}
--- a/infrastructure/terraform/modules/lambda_execution_role/variables.tf
+++ b/infrastructure/terraform/modules/lambda_execution_role/variables.tf
@ -0,0 +1,4 @@
+variable "name" {
+  description = "IAM role name for the Lambda execution role"
+  type        = string
+}
--- a/infrastructure/terraform/modules/lambda_service/main.tf
+++ b/infrastructure/terraform/modules/lambda_service/main.tf
@ -0,0 +1,15 @@
+resource "aws_lambda_function" "this" {
+  function_name = var.name
+  role          = var.role_arn
+
+  package_type = "Image"
+  image_uri    = var.image_uri
+
+  timeout     = var.timeout
+  memory_size = var.memory_size
+  publish     = true
+
+  environment {
+    variables = var.environment
+  }
+}
--- a/infrastructure/terraform/modules/lambda_service/outputs.tf
+++ b/infrastructure/terraform/modules/lambda_service/outputs.tf
@ -0,0 +1,3 @@
+output "lambda_arn" {
+  value = aws_lambda_function.this.arn
+}
--- a/infrastructure/terraform/modules/lambda_service/variables.tf
+++ b/infrastructure/terraform/modules/lambda_service/variables.tf
@ -0,0 +1,18 @@
+variable "name" { type = string }
+variable "role_arn" { type = string }
+variable "image_uri" { type = string }
+
+variable "timeout" {
+  type    = number
+  default = 30
+}
+
+variable "memory_size" {
+  type    = number
+  default = 512
+}
+
+variable "environment" {
+  type    = map(string)
+  default = {}
+}
--- a/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf
+++ b/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf
@ -0,0 +1,23 @@
+resource "aws_lambda_event_source_mapping" "this" {
+  event_source_arn = var.queue_arn
+  function_name    = var.lambda_arn
+  batch_size       = var.batch_size
+  enabled          = true
+}
+
+resource "aws_iam_role_policy" "allow_sqs" {
+  role = var.lambda_role_name
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [{
+      Effect = "Allow"
+      Action = [
+        "sqs:ReceiveMessage",
+        "sqs:DeleteMessage",
+        "sqs:GetQueueAttributes"
+      ]
+      Resource = var.queue_arn
+    }]
+  })
+}
--- a/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf
+++ b/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf
@ -0,0 +1,8 @@
+variable "lambda_arn" { type = string }
+variable "lambda_role_name" { type = string }
+variable "queue_arn" { type = string }
+
+variable "batch_size" {
+  type    = number
+  default = 10
+}
--- a/infrastructure/terraform/modules/sqs_queue/main.tf
+++ b/infrastructure/terraform/modules/sqs_queue/main.tf
@ -0,0 +1,14 @@
+resource "aws_sqs_queue" "dlq" {
+  name = "${var.name}-dlq"
+}
+
+resource "aws_sqs_queue" "this" {
+  name = var.name
+
+  visibility_timeout_seconds = 120
+
+  redrive_policy = jsonencode({
+    deadLetterTargetArn = aws_sqs_queue.dlq.arn
+    maxReceiveCount     = var.max_receive_count
+  })
+}
--- a/infrastructure/terraform/modules/sqs_queue/outputs.tf
+++ b/infrastructure/terraform/modules/sqs_queue/outputs.tf
@ -0,0 +1,7 @@
+output "queue_arn" {
+  value = aws_sqs_queue.this.arn
+}
+
+output "queue_url" {
+  value = aws_sqs_queue.this.url
+}
--- a/infrastructure/terraform/modules/sqs_queue/variables.tf
+++ b/infrastructure/terraform/modules/sqs_queue/variables.tf
@ -0,0 +1,6 @@
+variable "name" { type = string }
+
+variable "max_receive_count" {
+  type    = number
+  default = 5
+}
--- a/infrastructure/terraform/modules/tf_state_bucket/main.tf
+++ b/infrastructure/terraform/modules/tf_state_bucket/main.tf
@ -0,0 +1,30 @@
+resource "aws_s3_bucket" "this" {
+  bucket = var.bucket_name
+}
+
+resource "aws_s3_bucket_versioning" "this" {
+  bucket = aws_s3_bucket.this.id
+
+  versioning_configuration {
+    status = "Enabled"
+  }
+}
+
+resource "aws_s3_bucket_server_side_encryption_configuration" "this" {
+  bucket = aws_s3_bucket.this.id
+
+  rule {
+    apply_server_side_encryption_by_default {
+      sse_algorithm = "AES256"
+    }
+  }
+}
+
+resource "aws_s3_bucket_public_access_block" "this" {
+  bucket = aws_s3_bucket.this.id
+
+  block_public_acls       = true
+  block_public_policy     = true
+  ignore_public_acls      = true
+  restrict_public_buckets = true
+}
--- a/infrastructure/terraform/modules/tf_state_bucket/outputs.tf
+++ b/infrastructure/terraform/modules/tf_state_bucket/outputs.tf
@ -0,0 +1,7 @@
+output "bucket_name" {
+  value = aws_s3_bucket.this.bucket
+}
+
+output "bucket_arn" {
+  value = aws_s3_bucket.this.arn
+}
--- a/infrastructure/terraform/modules/tf_state_bucket/variables.tf
+++ b/infrastructure/terraform/modules/tf_state_bucket/variables.tf
@ -0,0 +1,3 @@
+variable "bucket_name" {
+  type = string
+}
--- a/infrastructure/terraform/shared/dev.tfvars
+++ b/infrastructure/terraform/shared/dev.tfvars
@ -1,5 +1,4 @@
 stage = "dev"
-profile = "DevAdmin"
 region = "eu-west-2"

 # Domain
--- a/infrastructure/terraform/shared/main.tf
+++ b/infrastructure/terraform/shared/main.tf
@ -8,7 +8,6 @@ terraform {
  backend "s3" {
    bucket  = "assessment-model-terraform-state"
    region  = "eu-west-2"
-    profile = "DevAdmin"
    key     = "terraform.tfstate"
  }

@ -16,7 +15,6 @@ terraform {
 }

 provider "aws" {
-  profile = var.profile
  region  = var.region
 }

@ -91,101 +89,101 @@ resource "aws_db_instance" "default" {

 # Set up the bucket that recieve the csv uploads of epc to be retrofit
 module "s3_presignable_bucket" {
-  source          = "./modules/s3_presignable_bucket"
+  source          = "../modules/s3_presignable_bucket"
  bucketname      = "retrofit-plan-inputs-${var.stage}"
  environment     = var.stage
  allowed_origins = var.allowed_origins
 }

 module "s3_due_considerations_bucket" {
-  source          = "./modules/s3_presignable_bucket"
+  source          = "../modules/s3_presignable_bucket"
  bucketname      = "retrofit-due-considerations-${var.stage}"
  environment     = var.stage
  allowed_origins = var.allowed_origins
 }

 module "s3_eco_spreadseet_bucket" {
-  source          = "./modules/s3_presignable_bucket"
+  source          = "../modules/s3_presignable_bucket"
  bucketname      = "retrofit-eco-spreadsheet-${var.stage}"
  environment     = var.stage
  allowed_origins = var.allowed_origins
 }

 module "s3" {
-  source          = "./modules/s3"
+  source          = "../modules/s3"
  bucketname      = "retrofit-datalake-${var.stage}"
  allowed_origins = var.allowed_origins
 }

 module "model_directory" {
-  source          = "./modules/s3"
+  source          = "../modules/s3"
  bucketname      = "retrofit-model-directory-${var.stage}"
  allowed_origins = var.allowed_origins
 }

 module "retrofit_sap_predictions" {
-  source          = "./modules/s3"
+  source          = "../modules/s3"
  bucketname      = "retrofit-sap-predictions-${var.stage}"
  allowed_origins = var.allowed_origins
 }

 module "retrofit_sap_data" {
-  source          = "./modules/s3"
+  source          = "../modules/s3"
  bucketname      = "retrofit-data-${var.stage}"
  allowed_origins = var.allowed_origins
 }

 module "retrofit_carbon_predictions" {
-  source          = "./modules/s3"
+  source          = "../modules/s3"
  bucketname      = "retrofit-carbon-predictions-${var.stage}"
  allowed_origins = var.allowed_origins
 }

 module "retrofit_heat_predictions" {
-  source          = "./modules/s3"
+  source          = "../modules/s3"
  bucketname      = "retrofit-heat-predictions-${var.stage}"
  allowed_origins = var.allowed_origins
 }

 module "retrofit_lighting_cost_predictions" {
-  source          = "./modules/s3"
+  source          = "../modules/s3"
  bucketname      = "retrofit-lighting-cost-predictions-${var.stage}"
  allowed_origins = var.allowed_origins
 }

 module "retrofit_heating_cost_predictions" {
-  source          = "./modules/s3"
+  source          = "../modules/s3"
  bucketname      = "retrofit-heating-cost-predictions-${var.stage}"
  allowed_origins = var.allowed_origins
 }

 module "retrofit_hot_water_cost_predictions" {
-  source          = "./modules/s3"
+  source          = "../modules/s3"
  bucketname      = "retrofit-hot-water-cost-predictions-${var.stage}"
  allowed_origins = var.allowed_origins
 }

 module "retrofit_heating_kwh_predictions" {
-  source          = "./modules/s3"
+  source          = "../modules/s3"
  bucketname      = "retrofit-heating-kwh-predictions-${var.stage}"
  allowed_origins = var.allowed_origins
 }

 module "retrofit_hotwater_kwh_predictions" {
-  source          = "./modules/s3"
+  source          = "../modules/s3"
  bucketname      = "retrofit-hotwater-kwh-predictions-${var.stage}"
  allowed_origins = var.allowed_origins
 }

 module "retrofit_sap_baseline_predictions" {
-  source          = "./modules/s3"
+  source          = "../modules/s3"
  bucketname      = "retrofit-sap-baseline-predictions-${var.stage}"
  allowed_origins = var.allowed_origins
 }

 // We make this bucket presignable, because we want to generate download links for the frontend
 module "retrofit_energy_assessments" {
-  source          = "./modules/s3_presignable_bucket"
+  source          = "../modules/s3_presignable_bucket"
  bucketname      = "retrofit-energy-assessments-${var.stage}"
  allowed_origins = var.allowed_origins
  environment     = var.stage
@ -193,7 +191,7 @@ module "retrofit_energy_assessments" {

 # Set up the route53 record for the API
 module "route53" {
-  source         = "./modules/route53"
+  source         = "../modules/route53"
  domain_name    = var.domain_name
  api_url_prefix = var.api_url_prefix
  providers = {
@ -201,75 +199,76 @@ module "route53" {
  }
 }

+
 # Create an ECR repository for storage of the lambda's docker images
 module "ecr" {
  ecr_name = "fastapi-repository-${var.stage}"
-  source   = "./modules/ecr"
+  source   = "../modules/ecr"
 }

 module "lambda_sap_prediction_ecr" {
  ecr_name = "lambda-sap-prediction-${var.stage}"
-  source   = "./modules/ecr"
+  source   = "../modules/ecr"
 }

 module "due_considerations_ecr" {
  ecr_name = "due-considerations-${var.stage}"
-  source   = "./modules/ecr"
+  source   = "../modules/ecr"
 }

 module "eco_spreadsheet_ecr" {
  ecr_name = "eco-spreadsheet-${var.stage}"
-  source   = "./modules/ecr"
+  source   = "../modules/ecr"
 }

 module "lambda_carbon_prediction_ecr" {
  ecr_name = "lambda-carbon-prediction-${var.stage}"
-  source   = "./modules/ecr"
+  source   = "../modules/ecr"
 }

 module "lambda_heat_prediction_ecr" {
  ecr_name = "lambda-heat-prediction-${var.stage}"
-  source   = "./modules/ecr"
+  source   = "../modules/ecr"
 }

 # ECR repos for lighting cost, heating cost and hot water cost models
 module "lambda_lighting_cost_prediction_ecr" {
  ecr_name = "lighting-cost-prediction-${var.stage}"
-  source   = "./modules/ecr"
+  source   = "../modules/ecr"
 }

 module "lambda_heating_cost_prediction_ecr" {
  ecr_name = "heating-cost-prediction-${var.stage}"
-  source   = "./modules/ecr"
+  source   = "../modules/ecr"
 }

 module "lambda_hot_water_cost_prediction_ecr" {
  ecr_name = "hot-water-cost-prediction-${var.stage}"
-  source   = "./modules/ecr"
+  source   = "../modules/ecr"
 }

 # For heating and hot water kwh models
 module "lambda_heating_kwh_prediction_ecr" {
  ecr_name = "heating-kwh-prediction-${var.stage}"
-  source   = "./modules/ecr"
+  source   = "../modules/ecr"
 }

 module "lambda_hotwater_kwh_prediction_ecr" {
  ecr_name = "hotwater-kwh-prediction-${var.stage}"
-  source   = "./modules/ecr"
+  source   = "../modules/ecr"
 }

 # Baselining models
 module "sap_baseline_ecr" {
  ecr_name = "sap-baseline-prediction-${var.stage}"
-  source   = "./modules/ecr"
+  source   = "../modules/ecr"
 }

 ##############################################
 # CDN - Cloudfront
 ##############################################
 module "cloudfront_distribution" {
-  source             = "./modules/cloudfront"
+  source             = "../modules/cloudfront"
  bucket_name        = module.s3.bucket_name
  bucket_id          = module.s3.bucket_id
  bucket_arn         = module.s3.bucket_arn
@ -281,7 +280,7 @@ module "cloudfront_distribution" {
 # SES - Email sending
 ################################################
 module "ses" {
-  source      = "./modules/ses"
+  source      = "../modules/ses"
  domain_name = "domna.homes"
  stage       = var.stage
 }
@ -289,3 +288,27 @@ module "ses" {
 output "ses_dns_records" {
  value = module.ses.dns_records
 }
+
+################################################
+# Address2UPRN – Lambda ECR
+################################################
+module "address2uprn_state_bucket" {
+  source      = "../modules/tf_state_bucket"
+  bucket_name = "address2uprn-terraform-state"
+
+}
+
+output "address2uprn_state_bucket_name" {
+  value = module.address2uprn_state_bucket.bucket_name
+}
+
+module "address2uprn_registry" {
+  source = "../modules/container_registry"
+  name   = "address2uprn"
+  stage = var.stage
+
+}
+
+output "address2uprn_repository_url" {
+  value = module.address2uprn_registry.repository_url
+}
--- a/infrastructure/terraform/shared/secrets.tf
+++ b/infrastructure/terraform/shared/secrets.tf
--- a/infrastructure/terraform/shared/variables.tf
+++ b/infrastructure/terraform/shared/variables.tf
@ -3,11 +3,6 @@ variable stage {
  type        = string
 }

-variable "profile" {
-  description = "AWS profile to use"
-  type        = string
-}
-
 variable "region" {
  description = "AWS region"
  type        = string
--- a/model_data/requirements/requirements.txt
+++ b/model_data/requirements/requirements.txt
@ -1,4 +1,4 @@
-pydantic==2.9.2
+pydantic>=1.10.7
 pydantic-settings==2.6.0
 epc-api-python==1.0.2
 numpy==2.1.2
--- a/pytest.ini
+++ b/pytest.ini
@ -1,4 +1,4 @@
 [pytest]
 pythonpath = .
 addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
-testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/onboarders/tests
+testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests
--- a/sfr/principal_pitch/2_export_data.py
+++ b/sfr/principal_pitch/2_export_data.py
@ -7,24 +7,29 @@ import numpy as np
 from backend.app.utils import sap_to_epc
 from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine, db_read_session
-from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
-from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel, PropertyDetailsSpatial
+from backend.app.db.models.recommendations import (
+    Recommendation,
+    Plan,
+    PlanRecommendations,
+    RecommendationMaterials,
+)
+from backend.app.db.models.portfolio import (
+    PropertyModel,
+    PropertyDetailsEpcModel,
+    PropertyDetailsSpatial,
+)
 from backend.app.db.functions.materials_functions import get_materials
 from collections import defaultdict
 from sqlalchemy import func

 # PORTFOLIO_ID = 206
 # SCENARIOS = [389]
-PORTFOLIO_ID = 435  # Peabody
+PORTFOLIO_ID = 502  # Peabody
 SCENARIOS = [
-    908,
-    909,
-    910,
+    986,
 ]
 scenario_names = {
-    908: "EPC C - no solid floor, ashp 3.0",
-    909: "EPC C - no solid floor, no EWI or IWI, ashp 3.0",
-    910: "EPC B - no solid floor, no EWI, ashp 3.0"
+    986: "EPC C",
 }


@ -35,22 +40,26 @@ def get_data(portfolio_id, scenario_ids):
    # --------------------
    # Properties
    # --------------------
-    properties_query = session.query(
-        PropertyModel,
-        PropertyDetailsEpcModel
-    ).join(
+    properties_query = (
+        session.query(PropertyModel, PropertyDetailsEpcModel)
+        .join(
            PropertyDetailsEpcModel,
-        PropertyModel.id == PropertyDetailsEpcModel.property_id
-    ).filter(
-        PropertyModel.portfolio_id == portfolio_id
-    ).all()
+            PropertyModel.id == PropertyDetailsEpcModel.property_id,
+        )
+        .filter(PropertyModel.portfolio_id == portfolio_id)
+        .all()
+    )

    properties_data = [
        {
-            **{col.name: getattr(p.PropertyModel, col.name)
-               for col in PropertyModel.__table__.columns},
-            **{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
-               for col in PropertyDetailsEpcModel.__table__.columns},
+            **{
+                col.name: getattr(p.PropertyModel, col.name)
+                for col in PropertyModel.__table__.columns
+            },
+            **{
+                col.name: getattr(p.PropertyDetailsEpcModel, col.name)
+                for col in PropertyDetailsEpcModel.__table__.columns
+            },
        }
        for p in properties_query
    ]
@ -62,13 +71,10 @@ def get_data(portfolio_id, scenario_ids):
        session.query(
            Plan.scenario_id,
            Plan.property_id,
-            func.max(Plan.created_at).label("latest_created_at")
+            func.max(Plan.created_at).label("latest_created_at"),
        )
        .filter(Plan.scenario_id.in_(scenario_ids))
-        .group_by(
-            Plan.scenario_id,
-            Plan.property_id
-        )
+        .group_by(Plan.scenario_id, Plan.property_id)
        .subquery()
    )

@ -80,9 +86,9 @@ def get_data(portfolio_id, scenario_ids):
        session.query(Plan)
        .join(
            latest_plans_subq,
-            (Plan.scenario_id == latest_plans_subq.c.scenario_id) &
-            (Plan.property_id == latest_plans_subq.c.property_id) &
-            (Plan.created_at == latest_plans_subq.c.latest_created_at)
+            (Plan.scenario_id == latest_plans_subq.c.scenario_id)
+            & (Plan.property_id == latest_plans_subq.c.property_id)
+            & (Plan.created_at == latest_plans_subq.c.latest_created_at),
        )
        .all()
    )
@ -107,28 +113,29 @@ def get_data(portfolio_id, scenario_ids):
    # --------------------
    # Recommendations (NO materials yet)
    # --------------------
-    recommendations_query = session.query(
-        Recommendation,
-        Plan.scenario_id,
-        PlanRecommendations.plan_id
-    ).join(
+    recommendations_query = (
+        session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id)
+        .join(
            PlanRecommendations,
-        Recommendation.id == PlanRecommendations.recommendation_id
-    ).join(
-        Plan,
-        Plan.id == PlanRecommendations.plan_id
-    ).filter(
+            Recommendation.id == PlanRecommendations.recommendation_id,
+        )
+        .join(Plan, Plan.id == PlanRecommendations.plan_id)
+        .filter(
            PlanRecommendations.plan_id.in_(plan_ids),
            Recommendation.default.is_(True),
-        Recommendation.already_installed.is_(False)
-    ).all()
+            Recommendation.already_installed.is_(False),
+        )
+        .all()
+    )

    recommendations_data = [
        {
-            **{col.name: getattr(r.Recommendation, col.name)
-               for col in Recommendation.__table__.columns},
+            **{
+                col.name: getattr(r.Recommendation, col.name)
+                for col in Recommendation.__table__.columns
+            },
            "scenario_id": r.scenario_id,
-            "materials": []  # placeholder
+            "materials": [],  # placeholder
        }
        for r in recommendations_query
    ]
@ -138,23 +145,25 @@ def get_data(portfolio_id, scenario_ids):
    # --------------------
    # Recommendation materials (SEPARATE QUERY)
    # --------------------
-    materials_query = session.query(
-        RecommendationMaterials
-    ).filter(
-        RecommendationMaterials.recommendation_id.in_(recommendation_ids)
-    ).all()
+    materials_query = (
+        session.query(RecommendationMaterials)
+        .filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids))
+        .all()
+    )

    # Group materials by recommendation_id
    materials_by_recommendation = defaultdict(list)

    for m in materials_query:
-        materials_by_recommendation[m.recommendation_id].append({
+        materials_by_recommendation[m.recommendation_id].append(
+            {
                "material_id": m.material_id,
                "depth": m.depth,
                "quantity": m.quantity,
                "quantity_unit": m.quantity_unit,
                "estimated_cost": m.estimated_cost,
-        })
+            }
+        )

    # Attach materials safely (no filtering side effects)
    for r in recommendations_data:
@ -165,7 +174,9 @@ def get_data(portfolio_id, scenario_ids):
    return properties_data, plans_data, recommendations_data


-properties_data, plans_data, recommendations_data = get_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS)
+properties_data, plans_data, recommendations_data = get_data(
+    portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS
+)

 properties_df = pd.DataFrame(properties_data)
 plans_df = pd.DataFrame(plans_data)
@ -176,10 +187,8 @@ with db_read_session() as session:

 materials = pd.DataFrame(materials)

-material_lookup = (
-    materials
-    .set_index("id")[["type", "includes_battery"]]
-    .to_dict("index")
+material_lookup = materials.set_index("id")[["type", "includes_battery"]].to_dict(
+    "index"
 )


@ -193,14 +202,14 @@ def has_solar_with_battery(materials_list):
    return False


-recommendations_df["has_solar_with_battery"] = (
-    recommendations_df["materials"].apply(has_solar_with_battery)
+recommendations_df["has_solar_with_battery"] = recommendations_df["materials"].apply(
+    has_solar_with_battery
 )

 recommendations_df["measure_type"] = np.where(
    recommendations_df["has_solar_with_battery"] == True,
    recommendations_df["measure_type"] + "_with_battery",
-    recommendations_df["measure_type"]
+    recommendations_df["measure_type"],
 )

 # Adjust material type to indicate if there is a battery included
@ -215,50 +224,67 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3

 for scenario_id in SCENARIOS:
    # Get recs for this scenario
-    recommended_measures_df = recommendations_df[recommendations_df["scenario_id"] == scenario_id][
-        ["property_id", "measure_type", "estimated_cost", "default"]
+    recommended_measures_df = recommendations_df[
+        recommendations_df["scenario_id"] == scenario_id
+    ][["property_id", "measure_type", "estimated_cost", "default"]]
+    recommended_measures_df = recommended_measures_df[
+        recommended_measures_df["default"]
    ]
-    recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
    recommended_measures_df = recommended_measures_df.drop(columns=["default"])

-    post_install_sap = recommendations_df[recommendations_df["scenario_id"] == scenario_id][
-        ["property_id", "default", "sap_points"]]
+    post_install_sap = recommendations_df[
+        recommendations_df["scenario_id"] == scenario_id
+    ][["property_id", "default", "sap_points"]]
    post_install_sap = post_install_sap[post_install_sap["default"]]
    # Sum up the sap points by property id
-    post_install_sap = post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index()
+    post_install_sap = (
+        post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index()
+    )

    # Find dupes by property id and measure type
-    dupes = recommended_measures_df.duplicated(subset=["property_id", "measure_type"], keep=False)
+    dupes = recommended_measures_df.duplicated(
+        subset=["property_id", "measure_type"], keep=False
+    )
    dupe_df = recommended_measures_df[dupes]

    if dupe_df.shape:
        # Drop dupes - happened due to a funny bug
        recommended_measures_df = recommended_measures_df.drop_duplicates(
-            subset=["property_id", "measure_type"], keep='first'
+            subset=["property_id", "measure_type"], keep="first"
        )

    recommendations_measures_pivot = recommended_measures_df.pivot(
-        index='property_id',
-        columns='measure_type',
-        values='estimated_cost'
+        index="property_id", columns="measure_type", values="estimated_cost"
    )
    recommendations_measures_pivot = recommendations_measures_pivot.reset_index()

    # Total cost is the row sum, excluding the property_id column
-    recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop(
-        columns=["property_id"]
-    ).sum(axis=1)
+    recommendations_measures_pivot["total_retrofit_cost"] = (
+        recommendations_measures_pivot.drop(columns=["property_id"]).sum(axis=1)
+    )

-    df = properties_df[
+    df = (
+        properties_df[
            [
-            "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
-            "heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
-            "id"
+                "landlord_property_id",
+                "property_id",
+                "uprn",
+                "address",
+                "postcode",
+                "property_type",
+                "walls",
+                "roof",
+                "heating",
+                "windows",
+                "current_epc_rating",
+                "current_sap_points",
+                "total_floor_area",
+                "number_of_rooms",
+                "id",
            ]
-    ].merge(
-        recommendations_measures_pivot, how="left", on="property_id"
-    ).merge(
-        post_install_sap, how="left", on="property_id"
+        ]
+        .merge(recommendations_measures_pivot, how="left", on="property_id")
+        .merge(post_install_sap, how="left", on="property_id")
    )

    # df = df.drop(columns=["property_id"])
@ -266,20 +292,24 @@ for scenario_id in SCENARIOS:

    df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
    df["predicted_post_works_sap"] = df["predicted_post_works_sap"]
-    df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
+    df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(
+        lambda x: sap_to_epc(x)
+    )
    df["uprn"] = df["uprn"].astype(str)

    relevant_plans = plans_df[plans_df["scenario_id"] == scenario_id]
    df2 = df.merge(
-        relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]], how="left", on="property_id",
-        suffixes=("", "_plan")
+        relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]],
+        how="left",
+        on="property_id",
+        suffixes=("", "_plan"),
    )
    print(df2["predicted_post_works_epc"].value_counts())
    print(df2["post_epc_rating"].value_counts())

    z = df2[
-        (df2["predicted_post_works_epc"] != "D") &
-        (df2["post_epc_rating"].astype(str) == "Epc.D")
+        (df2["predicted_post_works_epc"] != "D")
+        & (df2["post_epc_rating"].astype(str) == "Epc.D")
    ]

    df2["predicted_post_works_epc"].value_counts()
@ -295,183 +325,6 @@ for scenario_id in SCENARIOS:
    df[df["predicted_post_works_sap"] == ""]

    # Create excel to store to
-    filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
-                f"Project/Final SAL/scenarios/{scenario_names[scenario_id]} - 20250114 final.xlsx")
+    filename = f"{scenario_names[scenario_id]} - 20250113 final.xlsx"
    with pd.ExcelWriter(filename) as writer:
        df.to_excel(writer, sheet_name="properties", index=False)
-
-
-# asset_list = pd.DataFrame(asset_list)
-# asset_list = asset_list.rename(
-#     columns={
-#         "postcode": "domna_postcode"
-#     }
-# )
-# if "domna_full_address":
-#     # For Peabody
-#     asset_list["domna_full_address"] = asset_list["domna_address_1"]
-#
-# asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy()
-# asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"})
-# asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str)
-# asset_list = asset_list.merge(
-#     df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
-#     how="left",
-#     on="uprn"
-# )
-
-
-# Get conservation area data from property details spatial. based on the UPRNs
-def get_conservation_area_data(uprns):
-    session = sessionmaker(bind=db_engine)()
-    session.begin()
-
-    # Query to get conservation area data
-    spatial_query = session.query(
-        PropertyDetailsSpatial
-    ).filter(
-        PropertyDetailsSpatial.uprn.in_(uprns)  # Filter by UPRNs
-    ).all()
-
-    # Transform spatial data to include all fields dynamically
-    spatial_data = [
-        {col.name: getattr(spatial, col.name) for col in PropertyDetailsSpatial.__table__.columns}
-        for spatial in spatial_query
-    ]
-
-    session.close()
-    return pd.DataFrame(spatial_data)
-
-
-uprns = asset_list[
-    ~pd.isna(asset_list["uprn"]) & (asset_list["uprn"] != "<NA>")
-    ]["uprn"].astype(int).unique().tolist()
-conservation_area_data = get_conservation_area_data(uprns)
-conservation_area_data["uprn"] = conservation_area_data["uprn"].astype(str)
-asset_list = asset_list.merge(
-    conservation_area_data[["uprn", "conservation_status", "is_listed_building", "is_heritage_building"]],
-    how="left",
-    on="uprn"
-)
-
-# For exporting
-df.to_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/EPC C -without floors proposed measures - "
-    "with ID.xlsx",
-    index=False
-)
-# asset_list.to_excel(
-#     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/epc_measures.xlsx",
-#     index=False
-# )
-
-condition_costs = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx",
-    sheet_name="Prices - Khalim",
-    header=35
-)
-# Remove unnamed columns and reset index
-condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')]
-condition_costs = condition_costs.reset_index(drop=True)
-
-
-# We now estimate condition cost
-def simulate_condition(asset_list, condition_costs):
-    """
-    This function is for testing, and will simulate condition cost from 1-10 for each property to see what the
-    costing array looks like.
-    :param df:
-    :return:
-    """
-
-    condition_df = []
-    for _, row in asset_list.iterrows():
-
-        n_bathrooms = row["bathrooms"]
-
-        conditions = {}
-        for condition in reversed(range(1, 11)):
-            condition_cost = condition_costs[
-                condition_costs["Condition"] == condition
-                ].drop(columns=["Condition"]).iloc[0]
-
-            # Each cost is scaled by floor area
-            condition_cost = condition_cost * row["total_floor_area"]
-            condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms
-
-            total_condition_cost = condition_cost.sum()
-            conditions["Condition " + str(condition)] = (total_condition_cost)
-
-        condition_df.append(
-            {
-                "uprn": row["uprn"],
-                **conditions
-            }
-        )
-
-    condition_df = pd.DataFrame(condition_df)
-
-    asset_list = asset_list.merge(
-        condition_df,
-        how="left",
-        on="uprn"
-    )
-
-    return asset_list
-
-
-# asset_list = simulate_condition(asset_list, condition_costs)
-
-# We calculate the condition cost based on the condition
-for _, row in asset_list.iterrows():
-
-    condition = row["condition_score"]
-    if condition in [None, ""]:
-        continue
-    condition = int(float(condition))
-
-    condition_cost = condition_costs[
-        condition_costs["Condition"] == condition
-        ].drop(columns=["Condition"]).iloc[0]
-
-    # Each cost is scaled by floor area
-    condition_cost = condition_cost * float(row["total_floor_area"])
-    n_bathrooms = row["n_bathrooms"]
-    condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms)
-
-    total_condition_cost = condition_cost.sum()
-    asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost
-
-# Store output
-asset_list.to_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx",
-    index=False
-)
-
-condition_cost_comparison = asset_list[
-    ["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"]
-]
-
-# Testing
-plans_df.head()
-
-example = pd.read_excel(
-    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
-    "SAL/scenarios/EPC C - no solid floor, no EWI or IWI, ashp 3.0 - 20250114 final.xlsx"
-)
-
-plans_df2 = plans_df.merge(
-    properties_df[["property_id", "landlord_property_id"]],
-    left_on="property_id",
-    right_on="property_id",
-    how="left"
-)
-
-plans_df2 = plans_df2[plans_df2["scenario_id"] == 909]
-
-dupes = plans_df2[plans_df2["property_id"].duplicated()]
-
-# merge on plans
-example = example.merge(
-    plans_df, how="left",
-)