Merge pull request #712 from Hestia-Homes/main

Lambda deployment of postcode splitter
2026-06-08 11:17:27 +00:00 · 2026-02-16 15:58:15 +00:00 · 2026-02-16 15:58:15 +00:00 · 7007c93ca1
commit 7007c93ca1
parent f4e1081a12 1b5d0312b5
70 changed files with 4250 additions and 1838 deletions
--- a/.devcontainer/asset_list/devcontainer.json
+++ b/.devcontainer/asset_list/devcontainer.json
@ -22,7 +22,9 @@
        "jgclark.vscode-todo-highlight",
        "corentinartaud.pdfpreview",
        "ms-python.vscode-python-envs",
-        "ms-python.black-formatter"
+        "ms-python.black-formatter",
+        "GrapeCity.gc-excelviewer",
+        "jakobhoeg.vscode-pokemon"
      ],
      "settings": {
        "files.defaultWorkspace": "/workspaces/model",
--- a/.devcontainer/backend/Dockerfile
+++ b/.devcontainer/backend/Dockerfile
@ -43,4 +43,17 @@ WORKDIR /workspaces/model

 # 6) Make Python find your package
 # Add project root to PYTHONPATH for all processes
-ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}
+ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}
+
+
+# Install terraform
+RUN apt-get update && sudo apt-get install -y gnupg software-properties-common
+RUN wget -O- https://apt.releases.hashicorp.com/gpg | \
+gpg --dearmor | \
+sudo tee /usr/share/keyrings/hashicorp-archive-keyring.gpg > /dev/null
+RUN echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] \
+https://apt.releases.hashicorp.com $(lsb_release -cs) main" | \
+tee /etc/apt/sources.list.d/hashicorp.list
+RUN apt update
+RUN apt-get install terraform
+RUN terraform -install-autocomplete
--- a/.devcontainer/backend/devcontainer.json
+++ b/.devcontainer/backend/devcontainer.json
@ -6,7 +6,7 @@
  "workspaceFolder": "/workspaces/model",
  "postStartCommand": "bash .devcontainer/backend/post-install.sh",
  "mounts": [
-    "source=${localEnv:HOME},target=/workspaces/home,type=bind"
+    "source=${localEnv:HOME},target=/home/vscode,type=bind"
  ],
  "customizations": {
    "vscode": {
@ -22,7 +22,11 @@
        "corentinartaud.pdfpreview",
        "ms-python.vscode-python-envs",
        "ms-python.black-formatter",
-        "waderyan.gitblame"
+        "waderyan.gitblame",
+        "GrapeCity.gc-excelviewer",
+        "jakobhoeg.vscode-pokemon",
+        "github.vscode-github-actions",
+        "me-dutour-mathieu.vscode-github-actions"
      ],
      "settings": {
        "files.defaultWorkspace": "/workspaces/model",
@ -38,3 +42,4 @@
    "PYTHONFLAGS": "-Xfrozen_modules=off"
  }
 }
+ 
--- a/.devcontainer/backend/requirements.txt
+++ b/.devcontainer/backend/requirements.txt
@ -9,7 +9,7 @@ mangum==0.19.0
 # AWS
 boto3==1.35.44
 # Data
-openpyxl==3.1.2
+openpyxl==3.1.5
 # Basic
 pytz
 uvicorn[standard]
--- a/.github/workflows/_build_image.yml
+++ b/.github/workflows/_build_image.yml
@ -38,6 +38,8 @@ on:
        required: false
      DEV_DB_NAME:
        required: false
+      EPC_AUTH_TOKEN:
+        required: false

 jobs:
  build:
@ -47,6 +49,7 @@ jobs:
      DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
      DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
      DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
+      EPC_AUTH_TOKEN: ${{ secrets.EPC_AUTH_TOKEN }}

    outputs:
      image_digest: ${{ steps.digest.outputs.image_digest }}
@ -87,14 +90,17 @@ jobs:
            temp=$(eval echo "$line")
            BUILD_ARGS="$BUILD_ARGS --build-arg $temp"
          done <<< "${{ inputs.build_args }}"
-          
-          docker build \
+
+          docker buildx build \
+            --no-cache \
+            --platform linux/amd64 \
+            --provenance=false \
+            --sbom=false \
+            --push \
            -f ${{ inputs.dockerfile_path }} \
            $BUILD_ARGS \
            -t $IMAGE_URI \
            ${{ inputs.build_context }}
-      
-          docker push $IMAGE_URI

      - name: Resolve image digest
        id: digest
--- a/.github/workflows/_deploy_lambda.yml
+++ b/.github/workflows/_deploy_lambda.yml
@ -106,4 +106,10 @@ jobs:
      - name: Terraform Destroy
        if: inputs.terraform_destroy == 'true' && inputs.terraform_apply != 'true'
        working-directory: ${{ inputs.lambda_path }}
-        run: terraform destroy -auto-approve
+        run: |
+          terraform destroy -auto-approve \
+            -var="stage=${{ inputs.stage }}" \
+            -var="lambda_name=${{ inputs.lambda_name }}" \
+            -var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \
+            -var="image_digest=${{ inputs.image_digest }}"
+
--- a/.github/workflows/deploy_fastapi_backend.yml
+++ b/.github/workflows/deploy_fastapi_backend.yml
@ -141,3 +141,4 @@ jobs:

          # Deploy to AWS Lambda via Serverless
          sls deploy --stage ${{ github.ref_name }} --verbose
+
--- a/.github/workflows/deploy_terraform.yml
+++ b/.github/workflows/deploy_terraform.yml
@ -9,6 +9,7 @@ on:
      - '.github/workflows/deploy_terraform.yml'
      - '.github/workflows/_build_image.yml'
      - '.github/workflows/_deploy_lambda.yml'
+  workflow_dispatch:

 jobs:
  determine_stage:
@ -76,10 +77,10 @@ jobs:
        run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan

      - name: Terraform Apply
-        if: env.STAGE == 'prod'
+        if: env.TERRAFORM_APPLY == 'true'
        working-directory: infrastructure/terraform/shared
        run: terraform apply -auto-approve tfplan
-
+ 
  # ============================================================
  # 2️⃣ Build Address 2 UPRN image and Push
  # ============================================================
@ -90,10 +91,19 @@ jobs:
      ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
      dockerfile_path: backend/address2UPRN/handler/Dockerfile
      build_context: .
+      build_args: |
+        DEV_DB_HOST=$DEV_DB_HOST
+        DEV_DB_PORT=$DEV_DB_PORT
+        DEV_DB_NAME=$DEV_DB_NAME
+        EPC_AUTH_TOKEN=$EPC_AUTH_TOKEN
    secrets:
      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+      DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
+      DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
+      DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
+      EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }}

  # ============================================================
  # 3️⃣ Deploy Address 2 UPRN Lambda
@ -140,7 +150,7 @@ jobs:
  # 3️⃣ Deploy Postcode Splitter Lambda
  # ============================================================
  postcodeSplitter_lambda:
-    needs: [postcodeSplitter_image, determine_stage]
+    needs: [postcodeSplitter_image, determine_stage, address2uprn_lambda]
    uses: ./.github/workflows/_deploy_lambda.yml
    with:
      lambda_name: postcodeSplitter
@ -192,4 +202,5 @@ jobs:
    secrets:
      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
-      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -9,12 +9,14 @@
            "path": "/bin/bash"
        }
    },
-<<<<<<< HEAD
-=======
    "python.testing.unittestEnabled": false,
    "python.testing.pytestEnabled": true,
-    "python.testing.pytestArgs": ["-s", "-q", "--no-cov"]
->>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
+    "python.testing.pytestArgs": ["-s", "-q", "--no-cov"],
+
+    "python.languageServer": "Pylance",
+    "python.analysis.typeCheckingMode": "strict",
+    "python.analysis.autoSearchPaths": true,
+    "python.analysis.extraPaths": ["./src"]

    // Hot reload setting that needs to be in user settings
    // "jupyter.runStartupCommands": [
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@ -34,7 +34,7 @@ from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
 logger = setup_logger()

 # OpenAI API Key (set this in your environment variables for security)
-OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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")
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")



--- a/asset_list/app.py
+++ b/asset_list/app.py
@ -13,11 +13,15 @@ from asset_list.utils import get_data
 from dotenv import load_dotenv
 from backend.SearchEpc import SearchEpc

-load_dotenv(dotenv_path="backend/.env")
+load_dotenv(dotenv_path="../backend/.env")
 EPC_AUTH_TOKEN = os.getenv(
    "EPC_AUTH_TOKEN",
 )

+OPENAI_API_KEY = os.getenv(
+    "OPENAI_API_KEY",
+)
+

 def extract_address1(
    asset_list, full_address_col, postcode_col, method="first_two_words"
@ -109,21 +113,21 @@ def app():
    )
    data_filename = "to_standardise_uprns.xlsx"
    sheet_name = "Sheet1"
-    postcode_column = "Postcode"
+    postcode_column = "POSTCODE"
    address1_column = None
    address1_method = "house_number_extraction"
-    fulladdress_column = "Address"
-    address_cols_to_concat = None
+    fulladdress_column = "ADDRESS"
+    address_cols_to_concat = []
    missing_postcodes_method = None
    landlord_year_built = None
    landlord_os_uprn = None
-    landlord_property_type = None
-    landlord_built_form = None
-    landlord_wall_construction = None
-    landlord_roof_construction = None
-    landlord_heating_system = None
+    landlord_property_type = "PROPERTY TYPE"
+    landlord_built_form = None  # Skipped as empty
+    landlord_wall_construction = "wall combined"  # combin F + G
+    landlord_roof_construction = "HEATING SYSTEM"  # Combine I + J
+    landlord_heating_system = None  # Check with Khalim
    landlord_existing_pv = None
-    landlord_property_id = "LLUPRN"
+    landlord_property_id = "UPRN"
    landlord_sap = None
    outcomes_filename = None
    outcomes_sheetname = None
@ -275,7 +279,7 @@ def app():
        if skip is not None and not force_retrieve_data:
            if i <= skip:
                continue
-        chunk = asset_list.standardised_asset_list[i: i + chunk_size]
+        chunk = asset_list.standardised_asset_list[i : i + chunk_size]
        epc_data_chunk, errors_chunk, no_epc_chunk = get_data(
            df=chunk,
            row_id_name=asset_list.DOMNA_PROPERTY_ID,
@ -418,7 +422,7 @@ def app():
    # Retrieve just the data we need
    epc_df = epc_df[
        [asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
-        ].rename(columns=asset_list.EPC_API_DATA_NAMES)
+    ].rename(columns=asset_list.EPC_API_DATA_NAMES)

    # Look for columns not in the find my EPC data, which will have happened if we didn't
    # retrieve it in the first place
@ -435,7 +439,7 @@ def app():
        find_my_epc_data[
            [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]
            + list(asset_list.FIND_EPC_DATA_NAMES.keys())
-            ].rename(columns=asset_list.FIND_EPC_DATA_NAMES),
+        ].rename(columns=asset_list.FIND_EPC_DATA_NAMES),
        how="left",
        on=asset_list.DOMNA_PROPERTY_ID,
    )
--- a/backend/.env.test
+++ b/backend/.env.test
@ -19,4 +19,4 @@ PLAN_TRIGGER_BUCKET=test
 DATA_BUCKET=test
 EPC_AUTH_TOKEN=test
 ENGINE_SQS_URL=test
-ENERGY_ASSESSMENTS_BUCKET=test
+ENERGY_ASSESSMENTS_BUCKET=test
--- a/backend/Outputs.py
+++ b/backend/Outputs.py
@ -8,7 +8,11 @@ from utils.s3 import read_from_s3, save_excel_to_s3
 from backend.app.utils import sap_to_epc
 from backend.app.db.connection import db_engine
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
-from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
+from backend.app.db.models.recommendations import (
+    Recommendation,
+    PlanModel,
+    PlanRecommendations,
+)


 class Outputs:
@ -42,7 +46,7 @@ class Outputs:
        "flat_roof_insulation": "Flat roof (Out of scope - prov sum only)",
        "room_in_roof_insulation": "RIR (POA - Prov sum only)",
        "ev_charging": "EV Charging",
-        "battery": "Battery"
+        "battery": "Battery",
    }

    def __init__(self, format, portfolio_id):
@ -67,28 +71,38 @@ class Outputs:
        # Download cleaned data
        self.cleaned_epc_lookup = read_from_s3(
            s3_file_name="cleaned_epc_data/cleaned.bson",
-            bucket_name="retrofit-data-dev"
+            bucket_name="retrofit-data-dev",
        )

        self.cleaned_epc_lookup = msgpack.unpackb(self.cleaned_epc_lookup, raw=False)

    def get_properties_from_db(self):
        # Get properties and their details for a specific portfolio
-        properties_query = self.session.query(
-            PropertyModel,
-            PropertyDetailsEpcModel
-        ).join(
-            PropertyDetailsEpcModel,
-            PropertyModel.id == PropertyDetailsEpcModel.property_id
-        ).filter(
-            PropertyModel.portfolio_id == self.portfolio_id  # Filter by portfolio ID
-        ).all()
+        properties_query = (
+            self.session.query(PropertyModel, PropertyDetailsEpcModel)
+            .join(
+                PropertyDetailsEpcModel,
+                PropertyModel.id == PropertyDetailsEpcModel.property_id,
+            )
+            .filter(
+                PropertyModel.portfolio_id
+                == self.portfolio_id  # Filter by portfolio ID
+            )
+            .all()
+        )

        # Transform properties data to include all fields dynamically
        properties_data = [
-            {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
-             **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
-                PropertyDetailsEpcModel.__table__.columns}}
+            {
+                **{
+                    col.name: getattr(prop.PropertyModel, col.name)
+                    for col in PropertyModel.__table__.columns
+                },
+                **{
+                    col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
+                    for col in PropertyDetailsEpcModel.__table__.columns
+                },
+            }
            for prop in properties_query
        ]

@ -96,10 +110,14 @@ class Outputs:

    def get_plans_from_db(self):

-        plans_query = self.session.query(Plan).filter(Plan.portfolio_id == self.portfolio_id).all()
+        plans_query = (
+            self.session.query(PlanModel)
+            .filter(PlanModel.portfolio_id == self.portfolio_id)
+            .all()
+        )
        # Transform plans data to include all fields dynamically
        plans_data = [
-            {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+            {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
            for plan in plans_query
        ]

@ -107,28 +125,38 @@ class Outputs:

    def get_recommendations_from_db(self, plan_ids):
        # Get recommendations through PlanRecommendations for those plans and that are default
-        recommendations_query = self.session.query(
-            Recommendation,
-            Plan.scenario_id
-        ).join(
-            PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
-        ).join(
-            Plan, Plan.id == PlanRecommendations.plan_id  # Join with Plan to access scenario_id
-        ).filter(
-            PlanRecommendations.plan_id.in_(plan_ids),
-            Recommendation.default == True  # Filtering for default recommendations
-        ).all()
+        recommendations_query = (
+            self.session.query(Recommendation, PlanModel.scenario_id)
+            .join(
+                PlanRecommendations,
+                Recommendation.id == PlanRecommendations.recommendation_id,
+            )
+            .join(
+                PlanModel,
+                PlanModel.id
+                == PlanRecommendations.plan_id,  # Join with Plan to access scenario_id
+            )
+            .filter(
+                PlanRecommendations.plan_id.in_(plan_ids),
+                Recommendation.default == True,  # Filtering for default recommendations
+            )
+            .all()
+        )

        # Transform recommendations data to include all fields dynamically and include scenario_id
        recommendations_data = [
            {
                **{
-                    col.name: getattr(rec.Recommendation, col.name) if
-                    hasattr(rec, 'Recommendation') else getattr(rec, col.name)
+                    col.name: (
+                        getattr(rec.Recommendation, col.name)
+                        if hasattr(rec, "Recommendation")
+                        else getattr(rec, col.name)
+                    )
                    for col in Recommendation.__table__.columns
                },
-                "Scenario ID": rec.scenario_id
-            } for rec in recommendations_query
+                "Scenario ID": rec.scenario_id,
+            }
+            for rec in recommendations_query
        ]

        return recommendations_data
@ -148,7 +176,9 @@ class Outputs:
            measure_label = self.MDS_MEASURE_MAPPING.get(measure_type, None)

            # If the property_id already exists in the collected rows, update it
-            existing_row = next((item for item in rows if item["property_id"] == property_id), None)
+            existing_row = next(
+                (item for item in rows if item["property_id"] == property_id), None
+            )
            if existing_row is None:
                # Create a new row if the property_id doesn't exist
                new_row = {measure: None for measure in all_measures}
@ -196,7 +226,7 @@ class Outputs:
        properties_data = self.get_properties_from_db()

        plans_data = self.get_plans_from_db()
-        plan_ids = [plan['id'] for plan in plans_data]
+        plan_ids = [plan["id"] for plan in plans_data]

        recommendations_data = self.get_recommendations_from_db(plan_ids)
        self.session.close()
@ -209,50 +239,54 @@ class Outputs:
        scenario_ids = plans_df["scenario_id"].unique()

        # We start to create the MDS sheet
-        mds = properties_df[
-            [
-                "property_id",
-                "address",
-                "postcode",
-                "uprn",
-                "current_epc_rating",
-                "current_sap_points",
-                "primary_energy_consumption",
-                "property_type",
-                "built_form",
-                "total_floor_area",
-                "walls",
-                "tenure",
-                "mainfuel",
-                # The bills columns are split out - we include them and aggregate, without appliances
-                "heating_cost_current",
-                "hot_water_cost_current",
-                "lighting_cost_current",
-                "gas_standing_charge",
-                "electricity_standing_charge"
+        mds = (
+            properties_df[
+                [
+                    "property_id",
+                    "address",
+                    "postcode",
+                    "uprn",
+                    "current_epc_rating",
+                    "current_sap_points",
+                    "primary_energy_consumption",
+                    "property_type",
+                    "built_form",
+                    "total_floor_area",
+                    "walls",
+                    "tenure",
+                    "mainfuel",
+                    # The bills columns are split out - we include them and aggregate, without appliances
+                    "heating_cost_current",
+                    "hot_water_cost_current",
+                    "lighting_cost_current",
+                    "gas_standing_charge",
+                    "electricity_standing_charge",
+                ]
            ]
-        ].copy().rename(
-            columns={
-                "address": "Address",
-                "postcode": "Postcode",
-                "uprn": "UPRN",
-                "current_epc_rating": "Pre EPC",
-                "current_sap_points": "EPC Source",
-                "primary_energy_consumption": "Existing Heating Demand Kwh/m2/y",
-                "property_type": "Property Type",
-                "built_form": "Built Form",
-                "total_floor_area": "Floor area m2 (If known)",
-                "walls": "Wall Type (Mandatory field)",
-                "tenure": "Tenure",
-            }
+            .copy()
+            .rename(
+                columns={
+                    "address": "Address",
+                    "postcode": "Postcode",
+                    "uprn": "UPRN",
+                    "current_epc_rating": "Pre EPC",
+                    "current_sap_points": "EPC Source",
+                    "primary_energy_consumption": "Existing Heating Demand Kwh/m2/y",
+                    "property_type": "Property Type",
+                    "built_form": "Built Form",
+                    "total_floor_area": "Floor area m2 (If known)",
+                    "walls": "Wall Type (Mandatory field)",
+                    "tenure": "Tenure",
+                }
+            )
        )

        mds["Estimated bill (£ per year)"] = (
-            mds["heating_cost_current"] +
-            mds["hot_water_cost_current"] +
-            mds["lighting_cost_current"] +
-            mds["gas_standing_charge"] +
-            mds["electricity_standing_charge"]
+            mds["heating_cost_current"]
+            + mds["hot_water_cost_current"]
+            + mds["lighting_cost_current"]
+            + mds["gas_standing_charge"]
+            + mds["electricity_standing_charge"]
        )

        mds = mds.drop(
@ -261,65 +295,84 @@ class Outputs:
                "hot_water_cost_current",
                "lighting_cost_current",
                "gas_standing_charge",
-                "electricity_standing_charge"
+                "electricity_standing_charge",
            ]
        )

        # Formatting - Pre EPC is an enum
        mds["Pre EPC"] = [x.value for x in mds["Pre EPC"].values]
-        mds["Wall Type (Mandatory field)"] = mds["Wall Type (Mandatory field)"].str.split(",").str[0]
+        mds["Wall Type (Mandatory field)"] = (
+            mds["Wall Type (Mandatory field)"].str.split(",").str[0]
+        )
        # Remove average thermal transmittance field
        mds["Wall Type (Mandatory field)"] = np.where(
-            mds["Wall Type (Mandatory field)"].str.contains("Average thermal transmittance"),
+            mds["Wall Type (Mandatory field)"].str.contains(
+                "Average thermal transmittance"
+            ),
            "",
-            mds["Wall Type (Mandatory field)"]
+            mds["Wall Type (Mandatory field)"],
        )

        mds = mds.merge(
-            pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[["clean_description", "fuel_type"]],
+            pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[
+                ["clean_description", "fuel_type"]
+            ],
            left_on="mainfuel",
            right_on="clean_description",
-            how="left"
+            how="left",
+        )
+        mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(
+            columns=["clean_description", "mainfuel"]
        )
-        mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(columns=["clean_description", "mainfuel"])

        mds["Existing Fuel Type"].value_counts()

        mds_output_by_scenario = {}
        for scenario_id in scenario_ids:
-            scenario_recommendations = recommendations_df[recommendations_df["Scenario ID"] == scenario_id]
+            scenario_recommendations = recommendations_df[
+                recommendations_df["Scenario ID"] == scenario_id
+            ]

            # For each measure, we create the measure matrix
-            scenario_measure_matrix = self.make_mds_measure_matrix(scenario_recommendations)
+            scenario_measure_matrix = self.make_mds_measure_matrix(
+                scenario_recommendations
+            )

            # Calculate the predicted impact on: SAP, heat demand, bills, kwh
-            recommendation_impacts = scenario_recommendations.groupby("property_id")[
-                ["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"]
-            ].sum().reset_index()
+            recommendation_impacts = (
+                scenario_recommendations.groupby("property_id")[
+                    ["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"]
+                ]
+                .sum()
+                .reset_index()
+            )

            scenario_mds = mds.merge(
                scenario_measure_matrix, how="left", on="property_id"
-            ).merge(
-                recommendation_impacts, how="left", on="property_id"
-            )
+            ).merge(recommendation_impacts, how="left", on="property_id")
            # If we have no recommendations, sap_points, kwh_savings, head_demand will be NaN
            to_clean = [c for c in recommendation_impacts.columns if c != "property_id"]
            for col in to_clean:
                scenario_mds[col].fillna(0, inplace=True)
            scenario_mds.fillna(0, inplace=True)
-            scenario_mds["Post SAP"] = scenario_mds["EPC Source"] + scenario_mds["sap_points"]
+            scenario_mds["Post SAP"] = (
+                scenario_mds["EPC Source"] + scenario_mds["sap_points"]
+            )
            # Round Post SAP down to the nearest integer
            scenario_mds["Post SAP"] = scenario_mds["Post SAP"].apply(lambda x: int(x))
-            scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(lambda x: sap_to_epc(x))
+            scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(
+                lambda x: sap_to_epc(x)
+            )
            scenario_mds["Heating Demand Kwh/m2/y"] = (
-                scenario_mds["Existing Heating Demand Kwh/m2/y"] - scenario_mds["heat_demand"]
+                scenario_mds["Existing Heating Demand Kwh/m2/y"]
+                - scenario_mds["heat_demand"]
            )

            scenario_mds = scenario_mds.rename(
                columns={
                    "sap_points": "Predicted SAP Points",
                    "kwh_savings": "Energy Saving (Kwh)",
-                    "energy_cost_savings": "Bill Reduction (£ per yr)"
+                    "energy_cost_savings": "Bill Reduction (£ per yr)",
                }
            )

@ -330,7 +383,7 @@ class Outputs:
            save_excel_to_s3(
                df=scenario_mds,
                file_key=f"engine_outputs/{self.format}/{self.today}_scenario_id={scenario_id}.xlsx",
-                bucket_name="retrofit-data-dev"
+                bucket_name="retrofit-data-dev",
            )

    def export(self):
--- a/backend/address2UPRN/handler/Dockerfile
+++ b/backend/address2UPRN/handler/Dockerfile
@ -1,4 +1,17 @@
 FROM public.ecr.aws/lambda/python:3.10
+# FROM python:3.11.10-bullseye
+
+
+ARG DEV_DB_HOST
+ARG DEV_DB_PORT
+ARG DEV_DB_NAME
+ARG EPC_AUTH_TOKEN
+
+ENV DB_HOST=${DEV_DB_HOST}
+ENV DB_PORT=${DEV_DB_PORT}
+ENV DB_NAME=${DEV_DB_NAME}
+ENV EPC_AUTH_TOKEN=${EPC_AUTH_TOKEN}
+

 # Set working directory (Lambda task root)
 WORKDIR /var/task
@ -8,13 +21,17 @@ WORKDIR /var/task
 # -----------------------------
 COPY backend/address2UPRN/handler/requirements.txt .

+
 # Install dependencies into Lambda runtime
 RUN pip install --no-cache-dir -r requirements.txt

-# -----------------------------
-# Copy application code
-# -----------------------------
+
+# Copy necessary files for database and utility imports
 COPY utils/ utils/
+COPY backend/ backend/
+COPY datatypes/ datatypes/
+
+# Copy the handler
 COPY backend/address2UPRN/main.py .

 # -----------------------------
--- a/backend/address2UPRN/handler/requirements.txt
+++ b/backend/address2UPRN/handler/requirements.txt
@ -1,3 +1,11 @@
-epc-api-python==1.0.2
+pandas==2.2.2
+numpy<2.0
+requests
 tqdm
-pandas
+openpyxl
+epc-api-python==1.0.2
+boto3==1.35.44
+sqlmodel
+sqlalchemy==2.0.36
+psycopg2-binary==2.9.10
+pydantic-settings==2.6.0
--- a/backend/address2UPRN/main.py
+++ b/backend/address2UPRN/main.py
@ -3,12 +3,23 @@ import os
 from urllib.parse import urlencode
 import pandas as pd
 from difflib import SequenceMatcher
-from tqdm import tqdm
 from utils.logger import setup_logger
+import re
+from typing import Set
+import json
+import requests
+from uuid import UUID
+import uuid
+from backend.app.db.functions.tasks.Tasks import SubTaskInterface
+from utils.s3 import (
+    save_csv_to_s3,
+    read_csv_from_s3 as read_csv_from_s3_dict,
+    parse_s3_uri,
+)
+from datetime import datetime

 logger = setup_logger()

-import re

 EPC_AUTH_TOKEN = os.getenv(
    "EPC_AUTH_TOKEN",
@ -17,9 +28,28 @@ EPC_AUTH_TOKEN = os.getenv(
 if EPC_AUTH_TOKEN is None:
    raise RuntimeError("EPC_AUTH_TOKEN not defined in env")

-import re
-from difflib import SequenceMatcher
-from typing import Set
+
+def is_valid_postcode(postcode_clean: str) -> bool:
+    """
+    Validate postcode using postcodes.io.
+
+    Expects a sanitised postcode (e.g. E84SQ).
+    Returns True if valid, False otherwise.
+    """
+    POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
+    if not postcode_clean:
+        return False
+
+    try:
+        resp = requests.get(
+            POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
+            timeout=5,
+        )
+        resp.raise_for_status()
+        return resp.json().get("result", False)
+    except requests.RequestException:
+        # Network issues, rate limits, etc.
+        return False


 def levenshtein(a: str, b: str) -> float:
@ -300,27 +330,29 @@ def get_uprn_candidates(
    )


-def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
+def get_uprn_with_epc_df(
+    user_inputed_address: str,
+    epc_df: pd.DataFrame,
+    verbose: bool = False,
+):
    """
-    Return uprn (str)
-    Return False if failed to find a sensible matching epc
-    Return Nons when epc found but no UPRN
+    Return uprn (str) using a pre-fetched EPC dataframe.
+    This avoids calling the API multiple times for the same postcode.
    """
-    df = get_epc_data_with_postcode(postcode=postcode)
-
-    if df.empty:
+    if epc_df.empty:
        return None

    scored_df = get_uprn_candidates(
-        df,
+        epc_df,
        user_address=user_inputed_address,
    )

    # Best score
    best_score = scored_df.iloc[0]["lexiscore"]

-    if best_score <= 0:
-        return None
+    # # Return None if score is below threshold
+    # if best_score < 0.7:
+    #     return None

    # All rank-1 rows (possible draw)
    top_rank_df = scored_df[scored_df["lexirank"] == 1]
@ -330,18 +362,41 @@ def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
        return None

    address = top_rank_df["address"].values[0]
-    lexiscore = float(top_rank_df["lexiscore"].values[0])
+    score = float(top_rank_df["lexiscore"].values[0])

-    logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
+    logger.info(f"Address found to be: {address}, with lexiscore {score}")
    # Safe to return the agreed UPRN
    found_uprn = top_rank_df.iloc[0]["uprn"]

    if found_uprn == "":
        return None

-    if return_address:
-        return found_uprn, address
-    return found_uprn
+    if verbose:
+        return (found_uprn, address, score)
+    else:
+        return found_uprn
+
+
+def get_uprn(
+    user_inputed_address: str,
+    postcode: str,
+    verbose: bool = False,
+):
+    """
+    Return uprn (str)
+    Return False if failed to find a sensible matching epc
+    Return None when epc found but no UPRN
+
+    This function fetches EPC data via API for a single postcode.
+    For processing multiple addresses in the same postcode, use get_uprn_with_epc_df instead.
+    """
+    df = get_epc_data_with_postcode(postcode=postcode)
+
+    return get_uprn_with_epc_df(
+        user_inputed_address=user_inputed_address,
+        epc_df=df,
+        verbose=verbose,
+    )


 def resolve_uprns_for_postcode_group(
@ -424,148 +479,302 @@ def resolve_uprns_for_postcode_group(
    )


-def test(a, b):
-    assert a == b, f"erorr: {a}{type(a)} != {b}: {type(b)}"
+def save_results_to_s3(
+    results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
+) -> bool:
+    """
+    Save results DataFrame to S3 as CSV.
+
+    :param results_df: The DataFrame containing results
+    :param task_id: The task ID (used for file naming)
+    :param bucket_name: The S3 bucket name (defaults to env variable)
+    :return: True if successful, False otherwise
+    """
+    if bucket_name is None:
+        bucket_name = os.getenv("S3_BUCKET_NAME")
+
+    if not bucket_name:
+        logger.error(
+            "S3 bucket name not provided and S3_BUCKET_NAME environment variable not set"
+        )
+        return False
+
+    try:
+        # Create a filename with the task ID
+        file_name = f"{datetime.now().isoformat()}_{str(uuid.uuid4())[:8]}"
+        file_key = f"ara_raw_outputs/{task_id}/{sub_task_id}/{file_name}.csv"
+
+        # Save to S3
+        success = save_csv_to_s3(results_df, bucket_name, file_key)
+
+        if success:
+            logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}")
+            return True
+        else:
+            logger.error(f"Failed to save results to S3")
+            return False
+
+    except Exception as e:
+        logger.error(f"Error saving results to S3: {str(e)}")
+        return False


-def run_all_test():
-    # Basic usage with different post codes styles
-    test(get_epc_data_with_postcode("b93 8sy").shape[0], 63)
-    test(get_epc_data_with_postcode("B938sy").shape[0], 63)
-    test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
-    test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
+def handler(event, context, local=False):
+    print("=== Address2UPRN Lambda Handler ===")
+    print(f"Function: {context.function_name}")
+    print(f"Request ID: {context.aws_request_id}")

-    test(get_uprn("68", "b93 8sy"), "100070989938")
-    test(get_uprn("68 Glendon Way", "b93 8sy"), "100070989938")
-    test(get_uprn("Flat A, 28, Nelgarde Road", "se6 4tf"), "100023278633")
-    test(get_uprn("28 A", "se6 4tf"), "100023278633")
-    test(get_uprn("28A", "se6 4tf"), "100023278633")
-    test(get_uprn("6 Aitken Close", "E8 4SQ"), False)
+    # Handle local testing
+    if local is True:
+        event = {
+            "Records": [
+                {
+                    "body": json.dumps(
+                        {
+                            "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
+                            "sub_task_id": "6a427b6e-1ece-4983-b1e5-9bffccc53d1d",
+                            "s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-16T12:00:20.257856_7b520c0e.csv",
+                        }
+                    )
+                }
+            ]
+        }

-    # unique case
-    test(get_uprn("Flat 5, 1, Semley Gate", "e9 5nh"), "10008238198")
-    test(get_uprn("5 ,  1 Semley Gate", "e9 5nh"), "10008238198")
-    test(get_uprn("5 Semley Gate", "e9 5nh"), "10008238198")
-    test(get_uprn("1, 5 Semley Gate", "e9 5nh"), False)
-    test(
-        get_uprn("1 Semley Gate", "e9 5nh"), "10008238188"
-    )  # this one return "flat 1, in 1 semley gate"
-    test(
-        get_uprn("48 Oswald Street", "E5 0BT"), False
-    )  # this one return "flat 1, in 1 semley gate"
-    test(
-        get_uprn("42 Oswald Street", "E5 0BT"), False
-    )  # this one return "flat 1, in 1 semley gate"
-    test(
-        get_uprn("46 Oswald Street", "E5 0BT"), False
-    )  # this one return "flat 1, in 1 semley gate"
-    get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street")
-    get_uprn_candidates(
-        get_epc_data_with_postcode("Cr2 7dl"),
-        "FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",
-    )
+    print(f"Event: {json.dumps(event, indent=2, default=str)}")
+    print("===================================")

+    # Handle both single event and batch events (SQS, etc.)
+    records = event.get("Records", [event])
+    results = []
+    errors = []
+    subtask_interface = SubTaskInterface()

-if __name__ == "__main__":
-    INPUT_FILE = "hackney.xlsx"
-
-    ADDRESS_COL = "Address 1"
-    POSTCODE_COL = "Postcode"
-    UPRN_COL = "UPRN"
-
-    df = pd.read_excel(INPUT_FILE)
-
-    failures = []
-
-    for _, row in tqdm(
-        df.iterrows(),
-        total=len(df),
-        desc="Auditing UPRNs",
-    ):
-        input_address = str(row[ADDRESS_COL]).strip()
-        postcode = str(row[POSTCODE_COL]).strip()
-
-        expected_uprn = None if pd.isna(row[UPRN_COL]) else str(int(row[UPRN_COL]))
-
+    for record in records:
+        task_id = None
+        subtask_id = None
        try:
-            epc_df = get_epc_data_with_postcode(postcode)
+            # Parse body (inputs)
+            if isinstance(record.get("body"), str):
+                body = json.loads(record["body"])
+            else:
+                body = record.get("body", {})

-            if epc_df.empty:
-                failures.append(
-                    {
-                        **row.to_dict(),
-                        "found_uprn": None,
-                        "best_match_uprn": None,
-                        "best_match_address": None,
-                        "best_match_lexiscore": None,
-                        "status": "no_epc_results",
-                    }
+            # Validate required fields
+            task_id = body.get("task_id")
+            subtask_id = body.get("sub_task_id")
+            s3_uri = body.get("s3_uri")
+
+            if not task_id:
+                errors.append({"error": "Missing required field: task_id"})
+                continue
+
+            if not subtask_id:
+                errors.append({"error": "Missing required field: sub_task_id"})
+                continue
+
+            if not s3_uri:
+                errors.append({"error": "Missing required field: s3_uri"})
+                continue
+
+            # Convert task_id to UUID
+            try:
+                task_id = UUID(task_id) if isinstance(task_id, str) else task_id
+            except ValueError as e:
+                errors.append({"error": f"Invalid UUID format for task_id: {str(e)}"})
+                continue
+
+            # Convert sub_task_id to UUID
+            try:
+                subtask_id = (
+                    UUID(subtask_id) if isinstance(subtask_id, str) else subtask_id
+                )
+            except ValueError as e:
+                errors.append(
+                    {"error": f"Invalid UUID format for sub_task_id: {str(e)}"}
                )
                continue

-            scored_df = get_uprn_candidates(
-                epc_df,
-                user_address=input_address,
-            )
+            # Update existing subtask to 'in progress'
+            subtask_interface.update_subtask_status(subtask_id, "in progress")
+            logger.info(f"Processing subtask {subtask_id} for task {task_id}")

-            best_row = scored_df.iloc[0]
+            # Parse S3 URI and read CSV from S3
+            logger.info(f"Reading data from S3: {s3_uri}")
+            try:
+                bucket, key = parse_s3_uri(s3_uri)
+                csv_data = read_csv_from_s3_dict(bucket, key)
+                df = pd.DataFrame(csv_data)
+                logger.info(f"Loaded {len(df)} rows from S3")
+            except Exception as s3_error:
+                logger.error(f"Failed to read data from S3: {s3_error}")
+                errors.append(
+                    {"error": "Failed to read data from S3", "details": str(s3_error)}
+                )
+                try:
+                    subtask_interface.update_subtask_status(
+                        subtask_id, "failed", outputs={"error": str(s3_error)}
+                    )
+                except Exception as db_error:
+                    logger.error(f"Failed to update subtask status: {db_error}")
+                continue

-            best_match_uprn = str(best_row["uprn"])
-            best_match_address = best_row["address"]
-            best_match_lexiscore = round(float(best_row["lexiscore"]), 4)
+            # Process the rows
+            logger.info(f"Processing {len(df)} rows for task {task_id}")

-            found_uprn = get_uprn(input_address, postcode)
+            # Create user_input column by concatenating Address columns if not already present
+            if "user_input" not in df.columns:
+                df["user_input"] = (
+                    df["Address 1"].fillna("")
+                    + " "
+                    + df["Address 2"].fillna("")
+                    + " "
+                    + df["Address 3"].fillna("")
+                ).str.strip()
+                logger.info(f"Created user_input column from Address 1 and Address 2")
+            else:
+                logger.info(f"user_input column already present in data")
+
+            clean_df = df.dropna(subset=["postcode_clean"])
+
+            postcode_to_addresses = {
+                postcode: group.to_dict(orient="records")
+                for postcode, group in clean_df.groupby("postcode_clean", sort=False)
+            }
+
+            logger.info(f"Total postcodes: {len(postcode_to_addresses)}")
+
+            # Process each postcode group
+
+            results_data = []
+
+            for postcode, postcode_rows in postcode_to_addresses.items():
+                logger.info(
+                    f"Processing postcode: {postcode} with {len(postcode_rows)} rows"
+                )
+
+                # Validate postcode before processing
+                if not is_valid_postcode(postcode):
+                    logger.warning(f"Postcode {postcode} is invalid, skipping")
+                    continue
+
+                # Fetch EPC data once per postcode
+                try:
+                    epc_df = get_epc_data_with_postcode(postcode=postcode)
+                    logger.info(
+                        f"Fetched {len(epc_df)} EPC records for postcode {postcode}"
+                    )
+                except Exception as e:
+                    logger.error(
+                        f"Failed to fetch EPC data for postcode {postcode}: {e}"
+                    )
+                    continue
+
+                # Process each address in this postcode with the same EPC data
+                for row in postcode_rows:
+                    try:
+                        user_input = row.get("user_input", "")
+                        if not user_input:
+                            logger.warning(
+                                f"Skipping row with missing user_input for postcode {postcode}"
+                            )
+                            continue
+
+                        # Get UPRN using the pre-fetched EPC data with all return options
+                        result = get_uprn_with_epc_df(
+                            user_inputed_address=user_input, epc_df=epc_df, verbose=True
+                        )
+
+                        # Parse result tuple if successful
+                        if result:
+                            uprn, found_address, score = result
+                            logger.info(
+                                f"Found UPRN for {user_input} in {postcode}: {uprn} (score: {score})"
+                            )
+
+                            results_data.append(
+                                {
+                                    **row,  # Include all original data
+                                    "uprn": uprn,
+                                    "domna_found_address": found_address,
+                                    "domna_lexiscore": score,
+                                }
+                            )
+                        else:
+                            logger.warning(
+                                f"No UPRN found for {user_input} in {postcode}"
+                            )
+                            results_data.append(
+                                {
+                                    **row,  # Include all original data
+                                    "uprn": None,
+                                    "domna_found_address": None,
+                                    "domna_lexiscore": None,
+                                }
+                            )
+
+                    except Exception as e:
+                        logger.error(
+                            f"Error processing address {row.get('user_input', 'unknown')}: {e}"
+                        )
+                        # Still add the row with error markers
+                        results_data.append(
+                            {
+                                **row,
+                                "uprn": None,
+                                "domna_found_address": None,
+                                "domna_lexiscore": None,
+                                "error": str(e),
+                            }
+                        )
+                        continue
+
+            # Create results DataFrame
+            result_df = pd.DataFrame(results_data)
+
+            # Save results to S3
+            try:
+                save_results_to_s3(result_df, str(task_id), str(subtask_id))
+            except Exception as s3_error:
+                logger.error(f"Failed to save results to S3: {s3_error}")
+
+            # Mark subtask as completed
+            try:
+                subtask_interface.update_subtask_status(
+                    subtask_id,
+                    "completed",
+                    outputs={"rows_processed": "todo -> show sensible output"},
+                )
+                logger.info(f"Marked subtask {subtask_id} as completed")
+            except Exception as db_error:
+                logger.error(f"Failed to mark subtask as completed: {db_error}")

        except Exception as e:
-            failures.append(
-                {
-                    **row.to_dict(),
-                    "found_uprn": None,
-                    "best_match_uprn": None,
-                    "best_match_address": None,
-                    "best_match_lexiscore": None,
-                    "status": "exception",
-                    "error": str(e),
-                }
-            )
-            continue
+            logger.error(f"Unexpected error processing record: {e}", exc_info=True)
+            errors.append({"error": "Unexpected error", "details": str(e)})
+            # Mark subtask as failed if we have one
+            if subtask_id:
+                try:
+                    subtask_interface.update_subtask_status(
+                        subtask_id, "failed", outputs={"error": str(e)}
+                    )
+                except Exception as db_error:
+                    logger.error(f"Failed to update subtask status: {db_error}")

-        found_uprn_norm = None if not found_uprn else str(found_uprn)
+    # Return error if all records failed
+    logger.info(results_data)
+    logger.info(results)
+    if errors and not results:
+        return {"statusCode": 500, "body": json.dumps({"errors": errors})}

-        if found_uprn_norm != expected_uprn:
-            failures.append(
-                {
-                    **row.to_dict(),
-                    "found_uprn": found_uprn_norm,
-                    "best_match_uprn": best_match_uprn,
-                    "best_match_address": best_match_address,
-                    "best_match_lexiscore": best_match_lexiscore,
-                    "status": ("no_match" if found_uprn_norm is None else "mismatch"),
-                }
-            )
-
-    failures_df = pd.DataFrame(failures)
-
-    print("===================================")
-    print(f"Total rows : {len(df)}")
-    print(f"Failures   : {len(failures_df)}")
-    print("===================================")
-
-    failures_df.to_excel(
-        "hackney_uprn_failures.xlsx",
-        index=False,
-    )
+    return {
+        "statusCode": 200,
+        "body": json.dumps(
+            {"processed": results, "errors": errors if errors else None}
+        ),
+    }


-def handler(event, context):
-    print("hello world")
-    return {"statusCode": 200, "body": "hello world"}
-
-
-# TO do function dispatcher,
-
-# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate)
-# fix that
-# Look again at flat 1
-# pandas reader the seperate postcode_splitter
-# dump into s3
+# TODO:
+# Don't add results to return messages as its too verbose
+# capture the exepection as e, into s3, to find the logs go to s3
+# Upload results to s3 as well as csv
--- a/backend/address2UPRN/script.py
+++ b/backend/address2UPRN/script.py
@ -1,3 +1,5 @@
+# one time script for a customer forhousing
+
 import pandas as pd
 from tqdm import tqdm
 from backend.address2UPRN.main import get_uprn
@ -5,20 +7,35 @@ from backend.address2UPRN.main import get_uprn
 # Enable tqdm for pandas
 tqdm.pandas()

-df = pd.read_excel("address2.xlsx")
+file_name = "forhousing.xlsx"
+
+df = pd.read_excel(file_name)


 def extract_uprn(row):
-    print(row["User Input"], row["Postcode"])
-    result = get_uprn(row["User Input"], row["Postcode"], return_address=True)
+    user_input = "Address"
+    postcode = "Postcode"
+    result = get_uprn(
+        row[user_input],
+        row[postcode],
+        return_address=True,
+        return_EPC=True,
+        return_score=True,
+    )

    if result is None:
-        return pd.Series([None, None])
+        return pd.Series([None, None, None, None])

-    uprn, found_address = result
-    return pd.Series([uprn, found_address])
+    uprn, found_address, epc, score = result
+    return pd.Series([uprn, found_address, epc, score])


-df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1)
+df[["juntes uprn", "junte found address", "junte found epc", "junte score"]] = (
+    df.progress_apply(extract_uprn, axis=1)
+)

-df.to_excel("outputs2.xlsx", index=False)
+df.to_excel(f"{file_name}_outputs.xlsx", index=False)
+
+# TODO: add lexiscore
+# TODO: run it
+# TODO: give it to danny
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -18,37 +18,37 @@ def resolve_env_file() -> Optional[str]:


 class Settings(BaseSettings):
-    API_KEY: str
+    API_KEY: str = "changeme"
    API_KEY_NAME: str = "X-API-KEY"
-    SECRET_KEY: str
-    ENVIRONMENT: str
-    DATA_BUCKET: str
+    SECRET_KEY: str = "changeme"
+    ENVIRONMENT: str = "changeme"
+    DATA_BUCKET: str = "changeme"
    PLAN_TRIGGER_BUCKET: str
-    ENGINE_SQS_URL: str
+    ENGINE_SQS_URL: str = "changeme"

    # Third parties
-    EPC_AUTH_TOKEN: str
-    GOOGLE_SOLAR_API_KEY: str
+    EPC_AUTH_TOKEN: str = "changeme"
+    GOOGLE_SOLAR_API_KEY: str = "changeme"

    # Database settings
-    DB_HOST: str
-    DB_PASSWORD: str
-    DB_USERNAME: str
-    DB_PORT: str
-    DB_NAME: str
+    DB_HOST: str = "changeme"
+    DB_PASSWORD: str = "changeme"
+    DB_USERNAME: str = "changeme"
+    DB_PORT: str = "changeme"
+    DB_NAME: str = "changeme"

    # Prediction buckets
-    SAP_PREDICTIONS_BUCKET: str
-    CARBON_PREDICTIONS_BUCKET: str
-    HEAT_PREDICTIONS_BUCKET: str
+    SAP_PREDICTIONS_BUCKET: str = "changeme"
+    CARBON_PREDICTIONS_BUCKET: str = "changeme"
+    HEAT_PREDICTIONS_BUCKET: str = "changeme"
    # LIGHTING_COST_PREDICTIONS_BUCKET: str
    # HEATING_COST_PREDICTIONS_BUCKET: str
    # HOT_WATER_COST_PREDICTIONS_BUCKET: str
-    HEATING_KWH_PREDICTIONS_BUCKET: str
-    HOTWATER_KWH_PREDICTIONS_BUCKET: str
+    HEATING_KWH_PREDICTIONS_BUCKET: str = "changeme"
+    HOTWATER_KWH_PREDICTIONS_BUCKET: str = "changeme"

    # Other S3 buckts
-    ENERGY_ASSESSMENTS_BUCKET: str
+    ENERGY_ASSESSMENTS_BUCKET: str = "changeme"

    # Optional AWS creds (only required in local)
    AWS_ACCESS_KEY_ID: Optional[str] = None
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@ -1,5 +1,10 @@
 from sqlalchemy import func
-from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation, Scenario
+from backend.app.db.models.recommendations import (
+    PlanModel,
+    PlanRecommendations,
+    Recommendation,
+    ScenarioModel,
+)


 def aggregate_portfolio_recommendations(
@ -8,7 +13,7 @@ def aggregate_portfolio_recommendations(
    scenario_id: int,
    total_valuation_increase: float,
    labour_days: float,
-    aggregated_data: dict
+    aggregated_data: dict,
 ):
    # Aggregate multiple fields
    aggregates = (
@ -16,15 +21,20 @@ def aggregate_portfolio_recommendations(
            func.sum(Recommendation.estimated_cost).label("cost"),
            func.sum(Recommendation.total_work_hours).label("total_work_hours"),
            func.sum(Recommendation.kwh_savings).label("energy_savings"),
-            func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"),
+            func.sum(Recommendation.co2_equivalent_savings).label(
+                "co2_equivalent_savings"
+            ),
            func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"),
        )
-        .join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id)
-        .join(Plan, Plan.id == PlanRecommendations.plan_id)
+        .join(
+            PlanRecommendations,
+            PlanRecommendations.recommendation_id == Recommendation.id,
+        )
+        .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
        .filter(
-            Plan.portfolio_id == portfolio_id,
-            Plan.scenario_id == scenario_id,
-            Recommendation.default == True
+            PlanModel.portfolio_id == portfolio_id,
+            PlanModel.scenario_id == scenario_id,
+            Recommendation.default == True,
        )
        .one()
    )
@ -36,11 +46,11 @@ def aggregate_portfolio_recommendations(
        "energy_savings": aggregates.energy_savings or 0,
        "co2_equivalent_savings": aggregates.co2_equivalent_savings or 0,
        "energy_cost_savings": aggregates.energy_cost_savings or 0,
-        **aggregated_data
+        **aggregated_data,
    }

    # Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio
-    portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one()
+    portfolio_scenario = session.query(ScenarioModel).filter_by(id=scenario_id).one()

    # Update the data
    for key, value in aggregates_dict.items():
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@ -1,17 +1,33 @@
-from sqlalchemy import text
-from sqlalchemy import insert, delete
-from sqlalchemy.orm import Session
+from typing import Any, Dict, List, Optional
+from sqlalchemy import inspect, text, insert, delete, select, update
+from sqlalchemy.orm import Session, Mapper
 from sqlalchemy.exc import SQLAlchemyError
+from sqlmodel import Session
+
 from backend.app.db.models.recommendations import (
-    Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario
+    PlanModel,
+    Recommendation,
+    RecommendationMaterials,
+    PlanRecommendations,
+    ScenarioModel,
 )
 from backend.app.db.models.portfolio import PropertyModel
 from backend.app.db.connection import db_session, db_read_session


 def prepare_plan_data(
-    p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations,
-    rebaselining_carbon=0, rebaselining_heat_demand=0, rebaselining_kwh=0, rebaselining_bills=0,
+    p,
+    body,
+    scenario_id,
+    eco_packages,
+    valuations,
+    new_sap_points,
+    new_epc,
+    default_recommendations,
+    rebaselining_carbon=0,
+    rebaselining_heat_demand=0,
+    rebaselining_kwh=0,
+    rebaselining_bills=0,
 ):
    """
    Utility function to prepare the data that goes into the production of a plan. Is a fairly rough and unstructured
@ -32,21 +48,37 @@ def prepare_plan_data(
    """
    # Plan carbon savings
    co2_savings = sum(
-        [r["co2_equivalent_savings"] for r in default_recommendations if not r.get("already_installed", False)]
+        [
+            r["co2_equivalent_savings"]
+            for r in default_recommendations
+            if not r.get("already_installed", False)
+        ]
    )
    post_co2_emissions = p.energy["co2_emissions"] - rebaselining_carbon - co2_savings

    # Plan bill savings
    energy_bill_savings = sum(
-        [r["energy_cost_savings"] for r in default_recommendations if not r.get("already_installed", False)]
+        [
+            r["energy_cost_savings"]
+            for r in default_recommendations
+            if not r.get("already_installed", False)
+        ]
+    )
+    post_energy_bill = (
+        sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings
    )
-    post_energy_bill = sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings

    # energy consumption
    energy_consumption_savings = sum(
-        [r["kwh_savings"] for r in default_recommendations if not r.get("already_installed", False)]
+        [
+            r["kwh_savings"]
+            for r in default_recommendations
+            if not r.get("already_installed", False)
+        ]
+    )
+    post_energy_consumption = (
+        p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings
    )
-    post_energy_consumption = p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings

    valuation_post_retrofit, valuation_increase = None, None
    if valuations["current_value"]:
@ -54,9 +86,19 @@ def prepare_plan_data(
        valuation_post_retrofit = valuations["average_increased_value"]

    # plan costing data
-    cost_of_works = sum([r["total"] for r in default_recommendations if not r.get("already_installed", False)])
+    cost_of_works = sum(
+        [
+            r["total"]
+            for r in default_recommendations
+            if not r.get("already_installed", False)
+        ]
+    )
    contingency_cost = sum(
-        [r.get("contingency", 0) for r in default_recommendations if not r.get("already_installed", False)]
+        [
+            r.get("contingency", 0)
+            for r in default_recommendations
+            if not r.get("already_installed", False)
+        ]
    )

    return {
@ -86,7 +128,7 @@ def prepare_plan_data(
        "valuation_increase": valuation_increase,
        "cost_of_works": float(cost_of_works),
        "contingency_cost": float(contingency_cost),
-        "plan_type": eco_packages.get(p.id, (None, None, None))[2]
+        "plan_type": eco_packages.get(p.id, (None, None, None))[2],
    }


@ -97,7 +139,7 @@ def create_plan(session: Session, plan):
    :param plan: dictionary of data representing a plan to be created
    """
    try:
-        new_plan = Plan(**plan)
+        new_plan = PlanModel(**plan)
        session.add(new_plan)
        session.flush()
        session.commit()
@ -120,9 +162,7 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int
    ]

    stmt = (
-        insert(Plan)
-        .values(payload)
-        .returning(Plan.id, Plan.property_id)
+        insert(PlanModel).values(payload).returning(PlanModel.id, PlanModel.property_id)
    )

    result = session.execute(stmt).all()
@ -133,14 +173,14 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int

 def create_scenario(session: Session, scenario: dict) -> int:
    existing_scenario = (
-        session.query(Scenario)
+        session.query(ScenarioModel)
        .filter_by(portfolio_id=scenario["portfolio_id"])
        .first()
    )

    scenario["is_default"] = not bool(existing_scenario)

-    new_scenario = Scenario(**scenario)
+    new_scenario = ScenarioModel(**scenario)
    session.add(new_scenario)
    session.flush()  # ensures ID is populated

@ -167,7 +207,9 @@ def create_recommendation(session: Session, recommendation):
        raise e


-def create_recommendation_material(session: Session, recommendation_id, material_id, depth):
+def create_recommendation_material(
+    session: Session, recommendation_id, material_id, depth
+):
    """
    This function will create a record for the recommendation_material in the database if it does not exist.
    :param session: The databse session
@ -177,9 +219,7 @@ def create_recommendation_material(session: Session, recommendation_id, material
    """

    new_recommendation_material = RecommendationMaterials(
-        recommendation_id=recommendation_id,
-        material_id=material_id,
-        depth=depth
+        recommendation_id=recommendation_id, material_id=material_id, depth=depth
    )
    session.add(new_recommendation_material)
    session.flush()
@ -196,13 +236,17 @@ def create_plan_recommendations(session: Session, plan_id, recommendation_ids):
    """

    # Prepare a list of dictionaries for bulk insert
-    data = [{"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids]
+    data = [
+        {"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids
+    ]

    # Bulk insert using SQLAlchemy's core API
    session.execute(insert(PlanRecommendations).values(data))


-def upload_recommendations(session: Session, recommendations_to_upload, property_id, new_plan_id):
+def upload_recommendations(
+    session: Session, recommendations_to_upload, property_id, new_plan_id
+):
    try:
        # Prepare data for bulk insert for Recommendation
        recommendations_data = [
@ -213,8 +257,14 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
                "description": rec["description"],
                "estimated_cost": float(rec["total"]),
                "default": rec["default"],
-                "starting_u_value": float(rec.get("starting_u_value")) if rec.get("starting_u_value") else None,
-                "new_u_value": float(rec.get("new_u_value")) if rec.get("new_u_value") else None,
+                "starting_u_value": (
+                    float(rec.get("starting_u_value"))
+                    if rec.get("starting_u_value")
+                    else None
+                ),
+                "new_u_value": (
+                    float(rec.get("new_u_value")) if rec.get("new_u_value") else None
+                ),
                "sap_points": float(rec["sap_points"]),
                "energy_savings": float(rec["heat_demand"]),
                "kwh_savings": float(rec["kwh_savings"]),
@ -223,13 +273,17 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
                "energy_cost_savings": float(rec["energy_cost_savings"]),
                "labour_days": float(rec["labour_days"]),
                "already_installed": rec["already_installed"],
-                "heat_demand": float(rec["heat_demand"])
+                "heat_demand": float(rec["heat_demand"]),
            }
            for rec in recommendations_to_upload
        ]

        # Insert the recommendations, get back the IDs
-        stmt = insert(Recommendation).returning(Recommendation.id).values(recommendations_data)
+        stmt = (
+            insert(Recommendation)
+            .returning(Recommendation.id)
+            .values(recommendations_data)
+        )
        result = session.execute(stmt)
        uploaded_recommendation_ids = [row[0] for row in result]

@ -243,11 +297,15 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
                "quantity_unit": part.get("quantity_unit", None),
                "estimated_cost": float(part.get("total", part.get("total_cost"))),
            }
-            for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids)
+            for rec, recommendation_id in zip(
+                recommendations_to_upload, uploaded_recommendation_ids
+            )
            for part in rec["parts"]
        ]

-        session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data)
+        session.bulk_insert_mappings(
+            RecommendationMaterials, recommendation_materials_data
+        )

        # flush the changes to get the newly created IDs
        session.flush()
@ -283,25 +341,27 @@ def bulk_upload_recommendations_and_materials(
    plan_ids_by_index = []

    for rec in recommendation_payload:
-        recommendation_rows.append({
-            "property_id": rec["property_id"],
-            "type": rec["type"],
-            "measure_type": rec["measure_type"],
-            "description": rec["description"],
-            "estimated_cost": rec["estimated_cost"],
-            "default": rec["default"],
-            "starting_u_value": rec["starting_u_value"],
-            "new_u_value": rec["new_u_value"],
-            "sap_points": rec["sap_points"],
-            "heat_demand": rec["heat_demand"],
-            "kwh_savings": rec["kwh_savings"],
-            "co2_equivalent_savings": rec["co2_equivalent_savings"],
-            "energy_savings": rec["energy_savings"],
-            "energy_cost_savings": rec["energy_cost_savings"],
-            "total_work_hours": rec["total_work_hours"],
-            "labour_days": rec["labour_days"],
-            "already_installed": rec["already_installed"],
-        })
+        recommendation_rows.append(
+            {
+                "property_id": rec["property_id"],
+                "type": rec["type"],
+                "measure_type": rec["measure_type"],
+                "description": rec["description"],
+                "estimated_cost": rec["estimated_cost"],
+                "default": rec["default"],
+                "starting_u_value": rec["starting_u_value"],
+                "new_u_value": rec["new_u_value"],
+                "sap_points": rec["sap_points"],
+                "heat_demand": rec["heat_demand"],
+                "kwh_savings": rec["kwh_savings"],
+                "co2_equivalent_savings": rec["co2_equivalent_savings"],
+                "energy_savings": rec["energy_savings"],
+                "energy_cost_savings": rec["energy_cost_savings"],
+                "total_work_hours": rec["total_work_hours"],
+                "labour_days": rec["labour_days"],
+                "already_installed": rec["already_installed"],
+            }
+        )

        parts_by_index.append(rec["parts"])
        plan_ids_by_index.append(rec["plan_id"])
@ -310,9 +370,7 @@ def bulk_upload_recommendations_and_materials(
    # 2. Insert recommendations and get IDs
    # ---------------------------------------------------------
    result = session.execute(
-        insert(Recommendation)
-        .values(recommendation_rows)
-        .returning(Recommendation.id)
+        insert(Recommendation).values(recommendation_rows).returning(Recommendation.id)
    )

    recommendation_ids = [row[0] for row in result]
@ -324,19 +382,19 @@ def bulk_upload_recommendations_and_materials(

    for recommendation_id, parts in zip(recommendation_ids, parts_by_index):
        for part in parts:
-            materials_rows.append({
-                "recommendation_id": recommendation_id,
-                "material_id": part["material_id"],
-                "depth": part["depth"],
-                "quantity": part["quantity"],
-                "quantity_unit": part["quantity_unit"],
-                "estimated_cost": part["estimated_cost"],
-            })
+            materials_rows.append(
+                {
+                    "recommendation_id": recommendation_id,
+                    "material_id": part["material_id"],
+                    "depth": part["depth"],
+                    "quantity": part["quantity"],
+                    "quantity_unit": part["quantity_unit"],
+                    "estimated_cost": part["estimated_cost"],
+                }
+            )

    if materials_rows:
-        session.execute(
-            insert(RecommendationMaterials).values(materials_rows)
-        )
+        session.execute(insert(RecommendationMaterials).values(materials_rows))

    # ---------------------------------------------------------
    # 4. Insert plan ↔ recommendation links
@ -346,26 +404,22 @@ def bulk_upload_recommendations_and_materials(
            "plan_id": plan_id,
            "recommendation_id": recommendation_id,
        }
-        for plan_id, recommendation_id in zip(
-            plan_ids_by_index, recommendation_ids
-        )
+        for plan_id, recommendation_id in zip(plan_ids_by_index, recommendation_ids)
    ]

-    session.execute(
-        insert(PlanRecommendations).values(plan_recommendation_rows)
-    )
+    session.execute(insert(PlanRecommendations).values(plan_recommendation_rows))


 def chunked(iterable, size=100):
    for i in range(0, len(iterable), size):
-        yield iterable[i:i + size]
+        yield iterable[i : i + size]


 def get_property_ids(portfolio_id: int) -> list[int]:
    with db_read_session() as session:
        return [
-            pid for (pid,) in
-            session.query(PropertyModel.id)
+            pid
+            for (pid,) in session.query(PropertyModel.id)
            .filter(PropertyModel.portfolio_id == portfolio_id)
            .all()
        ]
@ -381,12 +435,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # recommendation_materials (via recommendation)
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation_materials rm
            USING recommendation r
            WHERE rm.recommendation_id = r.id
              AND r.property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -394,12 +450,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # plan_recommendations (via plan)
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan_recommendations pr
            USING plan p
            WHERE pr.plan_id = p.id
              AND p.property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -407,13 +465,15 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # funding_package_measures
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM funding_package_measures fpm
            USING funding_package fp, plan p
            WHERE fpm.funding_package_id = fp.id
              AND fp.plan_id = p.id
              AND p.property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -421,10 +481,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # inspections (direct)
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM inspections
            WHERE property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -432,12 +494,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # funding_package
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM funding_package fp
            USING plan p
            WHERE fp.plan_id = p.id
              AND p.property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -445,10 +509,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # recommendation (direct — CRITICAL FIX)
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation
            WHERE property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -456,10 +522,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # plan (direct)
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan
            WHERE property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -467,18 +535,22 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # property-scoped tables
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM property_details_epc
            WHERE property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM property_targets
            WHERE property_id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -486,10 +558,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
    # properties LAST
    # --------------------------------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM property
            WHERE id = ANY(:property_ids)
-        """),
+        """
+        ),
        params,
    )

@ -510,8 +584,7 @@ def delete_portfolio_scenarios_if_empty(portfolio_id: int):

    with db_session() as session:
        session.execute(
-            delete(Scenario)
-            .where(Scenario.portfolio_id == portfolio_id)
+            delete(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id)
        )

    print("Deleted scenarios for empty portfolio")
@ -530,6 +603,7 @@ def clear_portfolio_in_batches(

    total = (len(property_ids) + property_batch_size - 1) // property_batch_size
    import time
+
    for i, batch in enumerate(chunked(property_ids, property_batch_size), start=1):
        print(f"Deleting batch {i}/{total} ({len(batch)} properties)")
        start_time = time.time()
@ -542,3 +616,61 @@ def clear_portfolio_in_batches(
    delete_portfolio_scenarios_if_empty(portfolio_id)

    print("Portfolio cleared in batches.")
+
+
+def get_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]:
+    stmt = select(PlanModel).where(PlanModel.portfolio_id == portfolio_id)
+    with db_read_session() as session:
+        session_any: Any = session  # Typehint as Any to satisfy Pylance...
+        return session_any.exec(stmt).scalars().all()
+
+
+def get_scenario(scenario_id: int) -> Optional[ScenarioModel]:
+    stmt = select(ScenarioModel).where(ScenarioModel.id == scenario_id)
+    with db_read_session() as session:
+        session_any: Any = session  # Typehint as Any to satisfy Pylance...
+        return session_any.exec(stmt).scalar_one_or_none()
+
+
+def bulk_update_plans(
+    plan_models: List[PlanModel],
+    scenario_models: List[ScenarioModel],
+) -> int:
+    if not plan_models:
+        return 0
+
+    with db_read_session() as session:
+
+        plan_mapper: Mapper[Any] = inspect(PlanModel)
+        scenario_mapper: Mapper[Any] = inspect(ScenarioModel)
+
+        plan_mappings: List[Dict[str, Any]] = (
+            []
+        )  # Typehint as Any to satisfy Pylance...
+        for plan in plan_models:
+            data: Dict[str, Any] = {
+                c.name: getattr(plan, c.name)
+                for c in plan.__table__.columns
+                if c.name != "id"
+            }
+            data["id"] = plan.id
+            plan_mappings.append(data)
+
+        session.bulk_update_mappings(plan_mapper, plan_mappings)
+
+        scenario_mappings: List[Dict[str, Any]] = (
+            []
+        )  # Typehint as Any to satisfy Pylance...
+        for scenario in scenario_models:
+            data: Dict[str, Any] = {
+                c.name: getattr(scenario, c.name)
+                for c in scenario.__table__.columns
+                if c.name not in {"id", "portfolio_id"}
+            }
+            data["id"] = scenario.id
+            scenario_mappings.append(data)
+
+        session.bulk_update_mappings(scenario_mapper, scenario_mappings)
+
+        session.commit()
+        return len(plan_models)
--- a/backend/app/db/functions/tasks/init.py
+++ b/backend/app/db/functions/tasks/init.py
--- a/backend/app/db/models/funding.py
+++ b/backend/app/db/models/funding.py
@ -1,9 +1,18 @@
 import enum

-from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, BigInteger, ForeignKey
+from sqlalchemy import (
+    Column,
+    Integer,
+    String,
+    Float,
+    Enum,
+    TIMESTAMP,
+    BigInteger,
+    ForeignKey,
+)
 from sqlalchemy.orm import declarative_base
 from sqlalchemy.sql import func
-from backend.app.db.models.recommendations import Plan
+from backend.app.db.models.recommendations import PlanModel
 from backend.app.db.models.materials import MaterialType, Material

 Base = declarative_base()
@ -17,13 +26,17 @@ class SchemeEnum(enum.Enum):


 class FundingPackage(Base):
-    __tablename__ = 'funding_package'
+    __tablename__ = "funding_package"

    id = Column(Integer, primary_key=True, autoincrement=True)
-    plan_id = Column(BigInteger, ForeignKey(Plan.id), nullable=False)
+    plan_id = Column(BigInteger, ForeignKey(PlanModel.id), nullable=False)
    scheme = Column(
-        Enum(SchemeEnum, values_callable=lambda x: [e.value for e in x], create_constraint=False),
-        nullable=False
+        Enum(
+            SchemeEnum,
+            values_callable=lambda x: [e.value for e in x],
+            create_constraint=False,
+        ),
+        nullable=False,
    )
    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
    project_funding = Column(Float)
@ -34,15 +47,23 @@ class FundingPackage(Base):


 class FundingPackageMeasures(Base):
-    __tablename__ = 'funding_package_measures'
+    __tablename__ = "funding_package_measures"

    id = Column(Integer, primary_key=True, autoincrement=True)
-    funding_package_id = Column(BigInteger, ForeignKey(FundingPackage.id), nullable=False)
-    measure = Column(
-        Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False),
-        nullable=False
+    funding_package_id = Column(
+        BigInteger, ForeignKey(FundingPackage.id), nullable=False
    )
-    material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False)  # Assuming material table exists
+    measure = Column(
+        Enum(
+            MaterialType,
+            values_callable=lambda x: [e.value for e in x],
+            create_constraint=False,
+        ),
+        nullable=False,
+    )
+    material_id = Column(
+        BigInteger, ForeignKey(Material.id), nullable=False
+    )  # Assuming material table exists
    innovation_uplift = Column(Float)
    partial_project_score = Column(Float)
    uplift_project_score = Column(Float)
--- a/backend/app/db/models/portfolio.py
+++ b/backend/app/db/models/portfolio.py
@ -1,7 +1,17 @@
 import enum
 import pytz
 import datetime
-from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint
+from sqlalchemy import (
+    Column,
+    Integer,
+    Text,
+    Boolean,
+    Float,
+    DateTime,
+    Enum,
+    ForeignKey,
+    CheckConstraint,
+)
 from sqlalchemy.ext.declarative import declarative_base
 from backend.app.db.models.users import UserModel  # noqa
 from backend.app.db.models.materials import MaterialType
@ -22,7 +32,7 @@ class PortfolioStatus(enum.Enum):
    NEEDS_REVIEW = "needs review"


-class PortfolioGoal(enum.Enum):
+class PortfolioGoal(enum.Enum): # TODO: Move to domain?
    VALUATION_IMPROVEMENT = "Valuation Improvement"
    INCREASING_EPC = "Increasing EPC"
    REDUCING_CO2_EMISSIONS = "Reducing CO2 emissions"
@ -31,23 +41,43 @@ class PortfolioGoal(enum.Enum):


 class Portfolio(Base):
-    __tablename__ = 'portfolio'
+    __tablename__ = "portfolio"
    id = Column(Integer, primary_key=True, autoincrement=True)
    name = Column(Text, nullable=False)
    budget = Column(Float)
-    status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False)
-    goal = Column(Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]), nullable=False)
+    status = Column(
+        Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]),
+        nullable=False,
+    )
+    goal = Column(
+        Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]),
+        nullable=False,
+    )
    cost = Column(Float)
    number_of_properties = Column(Integer)
-    co2_equivalent_savings = Column(Float)  # Unit is always tonnes so we don't need to store the unit
-    energy_savings = Column(Float)  # Unit is always kWh so we don't need to store the unit
-    energy_cost_savings = Column(Float)  # Unit is always £ so we don't need to store the unit for the moment
-    property_valuation_increase = Column(Float)  # Unit is always £ so we don't need to store the unit for the moment
-    rental_yield_increase = Column(Float)  # Unit is always £ so we don't need to store the unit for the moment
+    co2_equivalent_savings = Column(
+        Float
+    )  # Unit is always tonnes so we don't need to store the unit
+    energy_savings = Column(
+        Float
+    )  # Unit is always kWh so we don't need to store the unit
+    energy_cost_savings = Column(
+        Float
+    )  # Unit is always £ so we don't need to store the unit for the moment
+    property_valuation_increase = Column(
+        Float
+    )  # Unit is always £ so we don't need to store the unit for the moment
+    rental_yield_increase = Column(
+        Float
+    )  # Unit is always £ so we don't need to store the unit for the moment
    total_work_hours = Column(Float)
    labour_days = Column(Float)
-    created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
-    updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
+    created_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
+    updated_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
    # Aggregations for summary
    epc_breakdown_pre_retrofit = Column(Text)
    epc_breakdown_post_retrofit = Column(Text)
@ -71,7 +101,7 @@ class PropertyCreationStatus(enum.Enum):
    ERROR = "ERROR"


-class Epc(enum.Enum):
+class Epc(enum.Enum):  # TODO: Move to domain?
    A = "A"
    B = "B"
    C = "C"
@ -82,20 +112,27 @@ class Epc(enum.Enum):


 class PropertyModel(Base):
-    __tablename__ = 'property'
+    __tablename__ = "property"
    id = Column(Integer, primary_key=True, autoincrement=True)
-    portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
+    portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
    creation_status = Column(Enum(PropertyCreationStatus), nullable=False)
    uprn = Column(Integer)
    landlord_property_id = Column(Text)
    building_reference_number = Column(Integer)
-    status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False)
+    status = Column(
+        Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]),
+        nullable=False,
+    )
    address = Column(Text)
    postcode = Column(Text)
    has_pre_condition_report = Column(Boolean)
    has_recommendations = Column(Boolean)
-    created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
-    updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
+    created_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
+    updated_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
    property_type = Column(Text)
    built_form = Column(Text)
    local_authority = Column(Text)
@ -127,7 +164,7 @@ rating_lookup = {
    "Average": FeatureRating.AVERAGE,
    "Poor": FeatureRating.POOR,
    "Very Poor": FeatureRating.VERY_POOR,
-    "N/A": FeatureRating.NA
+    "N/A": FeatureRating.NA,
 }


@ -136,32 +173,45 @@ def get_feature_rating_from_string(rating_str: str):


 class PropertyDetailsEpcModel(Base):
-    __tablename__ = 'property_details_epc'
+    __tablename__ = "property_details_epc"
    id = Column(Integer, primary_key=True, autoincrement=True)
-    property_id = Column(Integer, ForeignKey('property.id'), nullable=False)
-    portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
+    property_id = Column(Integer, ForeignKey("property.id"), nullable=False)
+    portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
    full_address = Column(Text)
    lodgement_date = Column(DateTime)
    is_expired = Column(Boolean)
    total_floor_area = Column(Float)
    walls = Column(Text)
-    walls_rating = Column(Integer, CheckConstraint('walls_rating>=1 AND walls_rating<=5'))
+    walls_rating = Column(
+        Integer, CheckConstraint("walls_rating>=1 AND walls_rating<=5")
+    )
    roof = Column(Text)
-    roof_rating = Column(Integer, CheckConstraint('roof_rating>=1 AND roof_rating<=5'))
+    roof_rating = Column(Integer, CheckConstraint("roof_rating>=1 AND roof_rating<=5"))
    floor = Column(Text)
-    floor_rating = Column(Integer, CheckConstraint('floor_rating>=1 AND floor_rating<=5'))
+    floor_rating = Column(
+        Integer, CheckConstraint("floor_rating>=1 AND floor_rating<=5")
+    )
    windows = Column(Text)
-    windows_rating = Column(Integer, CheckConstraint('windows_rating>=1 AND windows_rating<=5'))
+    windows_rating = Column(
+        Integer, CheckConstraint("windows_rating>=1 AND windows_rating<=5")
+    )
    heating = Column(Text)
-    heating_rating = Column(Integer, CheckConstraint('heating_rating>=1 AND heating_rating<=5'))
+    heating_rating = Column(
+        Integer, CheckConstraint("heating_rating>=1 AND heating_rating<=5")
+    )
    heating_controls = Column(Text)
    heating_controls_rating = Column(
-        Integer, CheckConstraint('heating_controls_rating>=1 AND heating_controls_rating<=5')
+        Integer,
+        CheckConstraint("heating_controls_rating>=1 AND heating_controls_rating<=5"),
    )
    hot_water = Column(Text)
-    hot_water_rating = Column(Integer, CheckConstraint('hot_water_rating>=1 AND hot_water_rating<=5'))
+    hot_water_rating = Column(
+        Integer, CheckConstraint("hot_water_rating>=1 AND hot_water_rating<=5")
+    )
    lighting = Column(Text)
-    lighting_rating = Column(Integer, CheckConstraint('lighting_rating>=1 AND lighting_rating<=5'))
+    lighting_rating = Column(
+        Integer, CheckConstraint("lighting_rating>=1 AND lighting_rating<=5")
+    )
    mainfuel = Column(Text)
    ventilation = Column(Text)
    solar_pv = Column(Text)
@ -219,7 +269,7 @@ class PropertyDetailsSpatial(Base):


 class PropertyDetailsMeter(Base):
-    __tablename__ = 'property_details_meter'
+    __tablename__ = "property_details_meter"
    id = Column(Integer, primary_key=True, autoincrement=True)
    uprn = Column(Integer, nullable=False)
    energy_supplier = Column(Text)
@ -230,11 +280,13 @@ class PropertyDetailsMeter(Base):


 class PropertyTargetsModel(Base):
-    __tablename__ = 'property_targets'
+    __tablename__ = "property_targets"
    id = Column(Integer, primary_key=True, autoincrement=True)
-    property_id = Column(Integer, ForeignKey('property.id'), nullable=False)
-    portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
-    created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
+    property_id = Column(Integer, ForeignKey("property.id"), nullable=False)
+    portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
+    created_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
    epc = Column(Enum(Epc))
    heat_demand = Column(Text)

@ -242,23 +294,36 @@ class PropertyTargetsModel(Base):
 class PortfolioUsers(Base):
    __tablename__ = "portfolioUsers"
    id = Column(Integer, primary_key=True, autoincrement=True)
-    user_id = Column(Integer, ForeignKey('user.id'), nullable=False)
-    portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
+    user_id = Column(Integer, ForeignKey("user.id"), nullable=False)
+    portfolioId = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
    role = Column(Text, nullable=False)
-    created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
-    updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
+    created_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
+    updated_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )


 class PropertyInstalledMeasures(Base):
    """
    This model keeps a record of the installed measures for each property, at the UPRN level
    """
-    __tablename__ = 'property_installed_measures'
+
+    __tablename__ = "property_installed_measures"
    id = Column(Integer, primary_key=True, autoincrement=True)
    uprn = Column(Integer, nullable=False)
    measure_type = Column(
-        Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False),
-        nullable=False
+        Enum(
+            MaterialType,
+            values_callable=lambda x: [e.value for e in x],
+            create_constraint=False,
+        ),
+        nullable=False,
+    )
+    created_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
+    installed_at = Column(
+        DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
    )
-    created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
-    installed_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@ -1,7 +1,19 @@
-from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum
-from sqlalchemy.orm import declarative_base
+from typing import Iterable, List, NamedTuple, Optional, Type
+from sqlalchemy import (
+    Column,
+    BigInteger,
+    String,
+    Float,
+    Boolean,
+    TIMESTAMP,
+    ForeignKey,
+    Enum,
+)
+from sqlalchemy.orm import declarative_base, Mapped, mapped_column
 from sqlalchemy.sql import func
-from backend.app.db.models.portfolio import Portfolio, PropertyModel
+from datetime import datetime
+
+from backend.app.db.models.portfolio import Portfolio, PortfolioGoal, PropertyModel
 from backend.app.db.models.materials import Material
 from backend.app.db.models.portfolio import Epc
 from datatypes.enums import QuantityUnits
@ -10,8 +22,12 @@ import enum
 Base = declarative_base()


+def portfolio_goal_values(enum_cls: Type[PortfolioGoal]) -> List[str]:
+    return [e.value for e in enum_cls]
+
+
 class Recommendation(Base):
-    __tablename__ = 'recommendation'
+    __tablename__ = "recommendation"

    id = Column(BigInteger, primary_key=True, autoincrement=True)
    property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
@ -37,19 +53,24 @@ class Recommendation(Base):


 class RecommendationMaterials(Base):
-    __tablename__ = 'recommendation_materials'
+    __tablename__ = "recommendation_materials"

    id = Column(BigInteger, primary_key=True, autoincrement=True)
-    recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
+    recommendation_id = Column(
+        BigInteger, ForeignKey("recommendation.id"), nullable=False
+    )
    material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False)
    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
    depth = Column(Float, nullable=False)
    quantity = Column(Float, nullable=False)
-    quantity_unit = Column(Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), nullable=False)
+    quantity_unit = Column(
+        Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]),
+        nullable=False,
+    )
    estimated_cost = Column(Float, nullable=False)


-class PlanTypeEnum(enum.Enum):
+class PlanTypeEnum(enum.Enum):  # TODO: move this to domain?
    SOLAR_ECO4 = "solar_eco4"
    SOLAR_HHRSH_ECO4 = "solar_hhrsh_eco4"
    EMPTY_CAVITY_ECO = "empty_cavity_eco"
@ -57,20 +78,36 @@ class PlanTypeEnum(enum.Enum):
    EXTRACTION_ECO = "extraction_eco"


-class Plan(Base):
-    __tablename__ = 'plan'
+class PlanModel(Base):
+    __tablename__ = "plan"

-    id = Column(BigInteger, primary_key=True, autoincrement=True)
-    name = Column(String, nullable=True, default="")
-    portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
-    property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
-    scenario_id = Column(BigInteger, ForeignKey('scenario.id'))  # Doesn't have to be linked to a scenario
-    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
-    is_default = Column(Boolean, nullable=False)
-    valuation_increase_lower_bound = Column(Float)
-    valuation_increase_upper_bound = Column(Float)
-    valuation_increase_average = Column(Float)
-    plan_type = Column(
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
+
+    name: Mapped[Optional[str]] = mapped_column(String, nullable=True, default="")
+
+    portfolio_id: Mapped[int] = mapped_column(
+        BigInteger, ForeignKey(Portfolio.id), nullable=False
+    )
+
+    property_id: Mapped[int] = mapped_column(
+        BigInteger, ForeignKey(PropertyModel.id), nullable=False
+    )
+
+    scenario_id: Mapped[Optional[int]] = mapped_column(
+        BigInteger, ForeignKey("scenario.id")
+    )
+
+    created_at: Mapped[datetime] = mapped_column(  # type: ignore
+        TIMESTAMP, nullable=False, server_default=func.now()
+    )
+
+    is_default: Mapped[bool] = mapped_column(Boolean, nullable=False)
+
+    valuation_increase_lower_bound: Mapped[Optional[float]] = mapped_column(Float)
+    valuation_increase_upper_bound: Mapped[Optional[float]] = mapped_column(Float)
+    valuation_increase_average: Mapped[Optional[float]] = mapped_column(Float)
+
+    plan_type: Mapped[Optional[PlanTypeEnum]] = mapped_column(
        Enum(
            PlanTypeEnum,
            name="plan_type",
@ -79,73 +116,90 @@ class Plan(Base):
        ),
        nullable=True,
    )
-    post_sap_points = Column(Float)
-    post_epc_rating = Column(Enum(Epc))
-    post_co2_emissions = Column(Float)
-    co2_savings = Column(Float)
-    post_energy_bill = Column(Float)
-    energy_bill_savings = Column(Float)
-    post_energy_consumption = Column(Float)  # energy demand in kWh/year
-    energy_consumption_savings = Column(Float)
-    valuation_post_retrofit = Column(Float)
-    valuation_increase = Column(Float)
+
+    post_sap_points: Mapped[Optional[float]] = mapped_column(Float)
+    post_epc_rating: Mapped[Optional[Epc]] = mapped_column(Enum(Epc))
+    post_co2_emissions: Mapped[Optional[float]] = mapped_column(Float)
+    co2_savings: Mapped[Optional[float]] = mapped_column(Float)
+    post_energy_bill: Mapped[Optional[float]] = mapped_column(Float)
+    energy_bill_savings: Mapped[Optional[float]] = mapped_column(Float)
+    post_energy_consumption: Mapped[Optional[float]] = mapped_column(Float)
+    energy_consumption_savings: Mapped[Optional[float]] = mapped_column(Float)
+    valuation_post_retrofit: Mapped[Optional[float]] = mapped_column(Float)
+    valuation_increase: Mapped[Optional[float]] = mapped_column(Float)
+
    # Financial metrics, excluding funding
-    cost_of_works = Column(Float)
-    contingency_cost = Column(Float)
+    cost_of_works: Mapped[Optional[float]] = mapped_column(Float)
+    contingency_cost: Mapped[Optional[float]] = mapped_column(Float)


 class PlanRecommendations(Base):
-    __tablename__ = 'plan_recommendations'
+    __tablename__ = "plan_recommendations"

    id = Column(BigInteger, primary_key=True, autoincrement=True)
-    plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False)
-    recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
+    plan_id = Column(BigInteger, ForeignKey("plan.id"), nullable=False)
+    recommendation_id = Column(
+        BigInteger, ForeignKey("recommendation.id"), nullable=False
+    )


-class Scenario(Base):
-    __tablename__ = 'scenario'
+class ScenarioModel(Base):
+    __tablename__ = "scenario"

-    id = Column(BigInteger, primary_key=True, autoincrement=True)
-    name = Column(String, nullable=False)
-    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
-    budget = Column(Float)
-    portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
-    housing_type = Column(String, nullable=False)
-    goal = Column(String, nullable=False)
-    goal_value = Column(String, nullable=False)
-    trigger_file_path = Column(String, nullable=False)
-    already_installed_file_path = Column(String)
-    patches_file_path = Column(String)
-    non_invasive_recommendations_file_path = Column(String)
-    exclusions = Column(String)
-    multi_plan = Column(Boolean, default=False)
-    is_default = Column(Boolean, default=False, nullable=False)
+    id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
+    name: Mapped[str] = mapped_column(String, nullable=False)
+    created_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP, nullable=False, server_default=func.now()
+    )
+    budget: Mapped[Optional[float]] = mapped_column(Float)
+    portfolio_id: Mapped[int] = mapped_column(
+        BigInteger, ForeignKey(Portfolio.id), nullable=False
+    )
+    housing_type: Mapped[str] = mapped_column(String, nullable=False)
+    goal: Mapped[PortfolioGoal] = mapped_column(
+        Enum(PortfolioGoal, values_callable=portfolio_goal_values, name="goal"),
+        nullable=False,
+    )
+    goal_value: Mapped[str] = mapped_column(String, nullable=False)
+    trigger_file_path: Mapped[str] = mapped_column(String, nullable=False)
+    already_installed_file_path: Mapped[Optional[str]] = mapped_column(String)
+    patches_file_path: Mapped[Optional[str]] = mapped_column(String)
+    non_invasive_recommendations_file_path: Mapped[Optional[str]] = mapped_column(
+        String
+    )
+    exclusions: Mapped[Optional[str]] = mapped_column(String)
+    multi_plan: Mapped[bool] = mapped_column(Boolean, default=False)
+    is_default: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)

    # Add in the fields we need, which were previously sitting at the portfolio level
-    cost = Column(Float)
-    contingency = Column(Float)
-    funding = Column(Float)
-    total_work_hours = Column(Float)
-    energy_savings = Column(Float)
-    co2_equivalent_savings = Column(Float)
-    energy_cost_savings = Column(Float)
-    epc_breakdown_pre_retrofit = Column(String)
-    epc_breakdown_post_retrofit = Column(String)
-    number_of_properties = Column(BigInteger)
-    n_units_to_retrofit = Column(BigInteger)
-    co2_per_unit_pre_retrofit = Column(String)
-    co2_per_unit_post_retrofit = Column(String)
-    energy_bill_per_unit_pre_retrofit = Column(String)
-    energy_bill_per_unit_post_retrofit = Column(String)
-    energy_consumption_per_unit_pre_retrofit = Column(String)
-    energy_consumption_per_unit_post_retrofit = Column(String)
-    valuation_improvement_per_unit = Column(String)
-    cost_per_unit = Column(String)
-    cost_per_co2_saved = Column(String)
-    cost_per_sap_point = Column(String)
-    valuation_return_on_investment = Column(String)
-    property_valuation_increase = Column(Float)
-    labour_days = Column(Float)
+    cost: Mapped[Optional[float]] = mapped_column(Float)
+    contingency: Mapped[Optional[float]] = mapped_column(Float)
+    funding: Mapped[Optional[float]] = mapped_column(Float)
+    total_work_hours: Mapped[Optional[float]] = mapped_column(Float)
+    energy_savings: Mapped[Optional[float]] = mapped_column(Float)
+    co2_equivalent_savings: Mapped[Optional[float]] = mapped_column(Float)
+    energy_cost_savings: Mapped[Optional[float]] = mapped_column(Float)
+    epc_breakdown_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
+    epc_breakdown_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
+    number_of_properties: Mapped[Optional[int]] = mapped_column(BigInteger)
+    n_units_to_retrofit: Mapped[Optional[int]] = mapped_column(BigInteger)
+    co2_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
+    co2_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
+    energy_bill_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
+    energy_bill_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
+    energy_consumption_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(
+        String
+    )
+    energy_consumption_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(
+        String
+    )
+    valuation_improvement_per_unit: Mapped[Optional[str]] = mapped_column(String)
+    cost_per_unit: Mapped[Optional[str]] = mapped_column(String)
+    cost_per_co2_saved: Mapped[Optional[str]] = mapped_column(String)
+    cost_per_sap_point: Mapped[Optional[str]] = mapped_column(String)
+    valuation_return_on_investment: Mapped[Optional[str]] = mapped_column(String)
+    property_valuation_increase: Mapped[Optional[float]] = mapped_column(Float)
+    labour_days: Mapped[Optional[float]] = mapped_column(Float)


 class MeasureType(enum.Enum):
@ -201,3 +255,12 @@ class InstalledMeasure(Base):
    heat_demand_savings = Column(Float)
    source = Column(String)
    is_active = Column(Boolean, nullable=False, default=True)
+
+
+def enum_values(e: Iterable[PlanTypeEnum]) -> list[str]:
+    return [m.value for m in e]
+
+
+class PlanPersistence(NamedTuple):
+    plan: PlanModel
+    scenario: ScenarioModel
--- a/backend/app/domain/classes/plan.py
+++ b/backend/app/domain/classes/plan.py
@ -0,0 +1,150 @@
+from __future__ import annotations
+from dataclasses import replace
+from typing import Optional
+
+from backend.app.db.models.portfolio import PortfolioGoal
+from backend.app.db.models.recommendations import (
+    PlanModel,
+    PlanPersistence,
+    ScenarioModel,
+)
+from backend.app.domain.classes.scenario import Scenario
+from backend.app.domain.records.plan_record import PlanRecord
+from backend.app.utils import sap_to_epc
+
+
+class Plan:
+    def __init__(
+        self, record: PlanRecord, scenario: Scenario, id: Optional[int] = None
+    ):
+        self.id: Optional[int] = id
+        self.record: PlanRecord = record
+        self.scenario: Scenario = scenario
+
+    @classmethod
+    def from_sqlalchemy(cls, plan_model: PlanModel, scenario: Scenario) -> Plan:
+        if not scenario:
+            raise ValueError(f"No Scenario associated with Plan of ID {plan_model.id}")
+
+        record = PlanRecord(
+            property_id=plan_model.property_id,
+            portfolio_id=plan_model.portfolio_id,
+            created_at=plan_model.created_at,
+            is_default=plan_model.is_default,
+            valuation_increase_lower_bound=plan_model.valuation_increase_lower_bound,
+            valuation_increase_upper_bound=plan_model.valuation_increase_upper_bound,
+            valuation_increase_average=plan_model.valuation_increase_average,
+            plan_type=plan_model.plan_type,
+            post_sap_points=plan_model.post_sap_points,
+            post_epc_rating=plan_model.post_epc_rating,
+            post_co2_emissions=plan_model.post_co2_emissions,
+            co2_savings=plan_model.co2_savings,
+            post_energy_bill=plan_model.post_energy_bill,
+            energy_bill_savings=plan_model.energy_bill_savings,
+            post_energy_consumption=plan_model.post_energy_consumption,
+            energy_consumption_savings=plan_model.energy_consumption_savings,
+            valuation_post_retrofit=plan_model.valuation_post_retrofit,
+            valuation_increase=plan_model.valuation_increase,
+            cost_of_works=plan_model.cost_of_works,
+            contingency_cost=plan_model.contingency_cost,
+        )
+        return cls(record=record, scenario=scenario, id=plan_model.id)
+
+    @property
+    def is_compliant(self) -> bool:
+        goal: PortfolioGoal = self.scenario.record.goal
+
+        match goal:
+            case PortfolioGoal.INCREASING_EPC:
+                return self._is_compliant_epc()
+            case _:
+                raise NotImplementedError
+
+    def to_sqlalchemy(self) -> PlanPersistence:
+        scenario_record = self.scenario.record
+
+        scenario_model = ScenarioModel(
+            id=self.scenario.id,
+            name=scenario_record.name,
+            created_at=scenario_record.created_at,
+            housing_type=scenario_record.housing_type,
+            goal=scenario_record.goal,
+            goal_value=scenario_record.goal_value,
+            trigger_file_path=scenario_record.trigger_file_path,
+            multi_plan=scenario_record.multi_plan,
+            is_default=scenario_record.is_default,
+            budget=scenario_record.budget,
+            already_installed_file_path=scenario_record.already_installed_file_path,
+            patches_file_path=scenario_record.patches_file_path,
+            non_invasive_recommendations_file_path=scenario_record.non_invasive_recommendations_file_path,
+            exclusions=scenario_record.exclusions,
+            cost=scenario_record.cost,
+            contingency=scenario_record.contingency,
+            funding=scenario_record.funding,
+            total_work_hours=scenario_record.total_work_hours,
+            energy_savings=scenario_record.energy_savings,
+            co2_equivalent_savings=scenario_record.co2_equivalent_savings,
+            energy_cost_savings=scenario_record.energy_cost_savings,
+            epc_breakdown_pre_retrofit=scenario_record.epc_breakdown_pre_retrofit,
+            epc_breakdown_post_retrofit=scenario_record.epc_breakdown_post_retrofit,
+            number_of_properties=scenario_record.number_of_properties,
+            n_units_to_retrofit=scenario_record.n_units_to_retrofit,
+            co2_per_unit_pre_retrofit=scenario_record.co2_per_unit_pre_retrofit,
+            co2_per_unit_post_retrofit=scenario_record.co2_per_unit_post_retrofit,
+            energy_bill_per_unit_pre_retrofit=scenario_record.energy_bill_per_unit_pre_retrofit,
+            energy_bill_per_unit_post_retrofit=scenario_record.energy_bill_per_unit_post_retrofit,
+            energy_consumption_per_unit_pre_retrofit=scenario_record.energy_consumption_per_unit_pre_retrofit,
+            energy_consumption_per_unit_post_retrofit=scenario_record.energy_consumption_per_unit_post_retrofit,
+            valuation_improvement_per_unit=scenario_record.valuation_improvement_per_unit,
+            cost_per_unit=scenario_record.cost_per_unit,
+            cost_per_co2_saved=scenario_record.cost_per_co2_saved,
+            cost_per_sap_point=scenario_record.cost_per_sap_point,
+            valuation_return_on_investment=scenario_record.valuation_return_on_investment,
+            property_valuation_increase=scenario_record.property_valuation_increase,
+            labour_days=scenario_record.labour_days,
+        )
+
+        record = self.record
+
+        plan_model = PlanModel(
+            id=self.id,
+            property_id=record.property_id,
+            portfolio_id=record.portfolio_id,
+            scenario_id=self.scenario.id,
+            created_at=record.created_at,
+            is_default=record.is_default,
+            valuation_increase_lower_bound=record.valuation_increase_lower_bound,
+            valuation_increase_upper_bound=record.valuation_increase_upper_bound,
+            valuation_increase_average=record.valuation_increase_average,
+            plan_type=record.plan_type,
+            post_sap_points=record.post_sap_points,
+            post_epc_rating=record.post_epc_rating,
+            post_co2_emissions=record.post_co2_emissions,
+            co2_savings=record.co2_savings,
+            post_energy_bill=record.post_energy_bill,
+            energy_bill_savings=record.energy_bill_savings,
+            post_energy_consumption=record.post_energy_consumption,
+            energy_consumption_savings=record.energy_consumption_savings,
+            valuation_post_retrofit=record.valuation_post_retrofit,
+            valuation_increase=record.valuation_increase,
+            cost_of_works=record.cost_of_works,
+            contingency_cost=record.contingency_cost,
+        )
+
+        return PlanPersistence(plan=plan_model, scenario=scenario_model)
+
+    def set_default(self, value: bool) -> None:
+        self.record = replace(self.record, is_default=value)
+        self.scenario.record = replace(self.scenario.record, is_default=value)
+
+    def _is_compliant_epc(self) -> bool:
+        goal_value: str = self.scenario.record.goal_value
+
+        if self.record.post_epc_rating:
+            post_epc = self.record.post_epc_rating.value
+        elif self.record.post_sap_points:
+            post_epc = sap_to_epc(self.record.post_sap_points)
+        else:
+            return False
+
+        return post_epc <= goal_value
--- a/backend/app/domain/classes/scenario.py
+++ b/backend/app/domain/classes/scenario.py
@ -0,0 +1,58 @@
+from __future__ import annotations
+from dataclasses import replace
+from typing import Optional
+
+from backend.app.db.models.recommendations import ScenarioModel
+from backend.app.domain.records.scenario_record import ScenarioRecord
+
+
+class Scenario:
+    def __init__(self, record: ScenarioRecord, id: Optional[int] = None):
+        self.id = id
+        self.record = record
+
+    @classmethod
+    def from_sqlalchemy(cls, scenario_model: ScenarioModel) -> Scenario:
+        record = ScenarioRecord(
+            name=scenario_model.name,
+            created_at=scenario_model.created_at,
+            housing_type=scenario_model.housing_type,
+            goal=scenario_model.goal,
+            goal_value=scenario_model.goal_value,
+            trigger_file_path=scenario_model.trigger_file_path,
+            multi_plan=scenario_model.multi_plan,
+            is_default=scenario_model.is_default,
+            budget=scenario_model.budget,
+            already_installed_file_path=scenario_model.already_installed_file_path,
+            patches_file_path=scenario_model.patches_file_path,
+            non_invasive_recommendations_file_path=scenario_model.non_invasive_recommendations_file_path,
+            exclusions=scenario_model.exclusions,
+            cost=scenario_model.cost,
+            contingency=scenario_model.contingency,
+            funding=scenario_model.funding,
+            total_work_hours=scenario_model.total_work_hours,
+            energy_savings=scenario_model.energy_savings,
+            co2_equivalent_savings=scenario_model.co2_equivalent_savings,
+            energy_cost_savings=scenario_model.energy_cost_savings,
+            epc_breakdown_pre_retrofit=scenario_model.epc_breakdown_pre_retrofit,
+            epc_breakdown_post_retrofit=scenario_model.epc_breakdown_post_retrofit,
+            number_of_properties=scenario_model.number_of_properties,
+            n_units_to_retrofit=scenario_model.n_units_to_retrofit,
+            co2_per_unit_pre_retrofit=scenario_model.co2_per_unit_pre_retrofit,
+            co2_per_unit_post_retrofit=scenario_model.co2_per_unit_post_retrofit,
+            energy_bill_per_unit_pre_retrofit=scenario_model.energy_bill_per_unit_pre_retrofit,
+            energy_bill_per_unit_post_retrofit=scenario_model.energy_bill_per_unit_post_retrofit,
+            energy_consumption_per_unit_pre_retrofit=scenario_model.energy_consumption_per_unit_pre_retrofit,
+            energy_consumption_per_unit_post_retrofit=scenario_model.energy_consumption_per_unit_post_retrofit,
+            valuation_improvement_per_unit=scenario_model.valuation_improvement_per_unit,
+            cost_per_unit=scenario_model.cost_per_unit,
+            cost_per_co2_saved=scenario_model.cost_per_co2_saved,
+            cost_per_sap_point=scenario_model.cost_per_sap_point,
+            valuation_return_on_investment=scenario_model.valuation_return_on_investment,
+            property_valuation_increase=scenario_model.property_valuation_increase,
+            labour_days=scenario_model.labour_days,
+        )
+        return cls(record, scenario_model.id)
+
+    def set_default(self, value: bool) -> None:
+        self.record = replace(self.record, is_default=value)
--- a/backend/app/domain/records/plan_record.py
+++ b/backend/app/domain/records/plan_record.py
@ -0,0 +1,31 @@
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Optional
+
+from backend.app.db.models.portfolio import Epc
+from backend.app.db.models.recommendations import PlanTypeEnum
+
+
+@dataclass(frozen=True)
+class PlanRecord:
+    property_id: int
+    portfolio_id: int
+    created_at: datetime
+    is_default: bool
+
+    valuation_increase_lower_bound: Optional[float] = None
+    valuation_increase_upper_bound: Optional[float] = None
+    valuation_increase_average: Optional[float] = None
+    plan_type: Optional[PlanTypeEnum] = None
+    post_sap_points: Optional[float] = None
+    post_epc_rating: Optional[Epc] = None
+    post_co2_emissions: Optional[float] = None
+    co2_savings: Optional[float] = None
+    post_energy_bill: Optional[float] = None
+    energy_bill_savings: Optional[float] = None
+    post_energy_consumption: Optional[float] = None
+    energy_consumption_savings: Optional[float] = None
+    valuation_post_retrofit: Optional[float] = None
+    valuation_increase: Optional[float] = None
+    cost_of_works: Optional[float] = None
+    contingency_cost: Optional[float] = None
--- a/backend/app/domain/records/scenario_record.py
+++ b/backend/app/domain/records/scenario_record.py
@ -0,0 +1,47 @@
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Optional
+
+from backend.app.db.models.portfolio import PortfolioGoal
+
+
+@dataclass(frozen=True)
+class ScenarioRecord:
+    name: str
+    created_at: datetime
+    housing_type: str
+    goal: PortfolioGoal
+    goal_value: str
+    trigger_file_path: str
+    multi_plan: bool
+    is_default: bool
+    budget: Optional[float] = None
+    already_installed_file_path: Optional[str] = None
+    patches_file_path: Optional[str] = None
+    non_invasive_recommendations_file_path: Optional[str] = None
+    exclusions: Optional[str] = None
+
+    cost: Optional[float] = None
+    contingency: Optional[float] = None
+    funding: Optional[float] = None
+    total_work_hours: Optional[float] = None
+    energy_savings: Optional[float] = None
+    co2_equivalent_savings: Optional[float] = None
+    energy_cost_savings: Optional[float] = None
+    epc_breakdown_pre_retrofit: Optional[str] = None
+    epc_breakdown_post_retrofit: Optional[str] = None
+    number_of_properties: Optional[int] = None
+    n_units_to_retrofit: Optional[int] = None
+    co2_per_unit_pre_retrofit: Optional[str] = None
+    co2_per_unit_post_retrofit: Optional[str] = None
+    energy_bill_per_unit_pre_retrofit: Optional[str] = None
+    energy_bill_per_unit_post_retrofit: Optional[str] = None
+    energy_consumption_per_unit_pre_retrofit: Optional[str] = None
+    energy_consumption_per_unit_post_retrofit: Optional[str] = None
+    valuation_improvement_per_unit: Optional[str] = None
+    cost_per_unit: Optional[str] = None
+    cost_per_co2_saved: Optional[str] = None
+    cost_per_sap_point: Optional[str] = None
+    valuation_return_on_investment: Optional[str] = None
+    property_valuation_increase: Optional[float] = None
+    labour_days: Optional[float] = None
--- a/backend/app/requirements/requirements.txt
+++ b/backend/app/requirements/requirements.txt
@ -10,7 +10,7 @@ mangum==0.19.0
 # AWS
 boto3==1.35.44
 # Data
-openpyxl==3.1.2
+openpyxl==3.1.5
 # Basic
 pytz
 sqlmodel
--- a/backend/categorisation/init.py
+++ b/backend/categorisation/init.py
--- a/backend/categorisation/categorisation_trigger_request.py
+++ b/backend/categorisation/categorisation_trigger_request.py
@ -0,0 +1,5 @@
+from pydantic import BaseModel
+
+
+class CategorisationTriggerRequest(BaseModel):
+    portfolio_id: int
--- a/backend/categorisation/handler/Dockerfile
+++ b/backend/categorisation/handler/Dockerfile
@ -0,0 +1,47 @@
+FROM public.ecr.aws/lambda/python:3.11
+# For local running:
+# FROM python:3.11.10-bullseye
+
+ARG DEV_DB_HOST
+ARG DEV_DB_PORT
+ARG DEV_DB_NAME
+
+
+# Set working directory (Lambda task root)
+WORKDIR /var/task
+
+# Environment
+ENV DB_HOST=${DEV_DB_HOST}
+ENV DB_PORT=${DEV_DB_PORT}
+ENV DB_NAME=${DEV_DB_NAME}
+
+COPY backend/.env.test backend/.env
+
+# -----------------------------
+# Copy requirements FIRST (for Docker layer caching)
+# -----------------------------
+COPY backend/categorisation/handler/requirements.txt .
+
+# Install dependencies into Lambda runtime
+RUN pip install --no-cache-dir -r requirements.txt
+
+# -----------------------------
+# Copy application code
+# -----------------------------
+COPY utils/ utils/
+COPY backend/categorisation/ backend/categorisation/
+
+COPY backend/app/db/connection.py backend/app/db/connection.py
+COPY backend/app/config.py backend/app/config.py
+
+COPY backend/__init__.py backend/__init__.py
+COPY backend/app/__init__.py backend/app/__init__.py
+COPY backend/app/db/__init__.py backend/app/db/__init__.py
+
+
+# -----------------------------
+# Lambda handler
+# -----------------------------
+CMD ["backend/categorisation/handler/handler.handler"]
+# For local running
+# CMD ["python", "-m", "backend.categorisation.handler.handler"]
--- a/backend/categorisation/handler/handler.py
+++ b/backend/categorisation/handler/handler.py
@ -0,0 +1,10 @@
+from typing import Any, Mapping
+from utils.logger import setup_logger
+
+
+logger = setup_logger()
+
+
+def handler(event: Mapping[str, Any], context: Any) -> None:
+
+    pass
--- a/backend/categorisation/handler/requirements.txt
+++ b/backend/categorisation/handler/requirements.txt
@ -0,0 +1,3 @@
+sqlmodel
+pydantic-settings
+psycopg2-binary==2.9.10
--- a/backend/categorisation/local_runner.py
+++ b/backend/categorisation/local_runner.py
@ -0,0 +1,11 @@
+from backend.categorisation.processor import process_portfolio
+
+
+def main() -> None:
+    portfolio_id = 556
+
+    process_portfolio(portfolio_id)
+
+
+if __name__ == "__main__":
+    main()
--- a/backend/categorisation/processor.py
+++ b/backend/categorisation/processor.py
@ -0,0 +1,93 @@
+from collections import defaultdict
+from typing import Dict, List
+
+from backend.app.db.functions.recommendations_functions import (
+    bulk_update_plans,
+    get_plans_by_portfolio_id,
+    get_scenario,
+)
+from backend.app.db.models.recommendations import PlanModel, ScenarioModel
+from backend.app.domain.classes.plan import Plan
+from backend.app.domain.classes.scenario import Scenario
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def process_portfolio(portfolio_id: int) -> None:
+    print(f"Processing portfolio {portfolio_id}")
+    plans: List[Plan] = _load_plans_for_portfolio(portfolio_id)
+    plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans)
+
+    for uprn, property_plans in plans_by_property.items():
+
+        if not property_plans:
+            raise ValueError(f"No plans for property {uprn}")
+
+        cheapest_plan = _choose_cheapest_relevant_plan(property_plans)
+        _update_default_flags(property_plans, cheapest_plan)
+
+
+def _load_plans_for_portfolio(portfolio_id: int) -> List[Plan]:
+    plan_models = get_plans_by_portfolio_id(portfolio_id)
+    print(f"Got {len(plan_models)} plans from database")
+
+    plans: List[Plan] = []
+
+    for model in plan_models:
+        if not model.scenario_id:
+            logger.info(f"No Scenario associated with Plan of ID {model.id}")
+            continue
+
+        scenario_model = get_scenario(model.scenario_id)
+        plans.append(
+            Plan.from_sqlalchemy(model, Scenario.from_sqlalchemy(scenario_model))
+        )
+        print("Successfully mapped plan and scenario to domain object")
+
+    return plans
+
+
+def _group_plans_by_property(plans: List[Plan]) -> Dict[int, List[Plan]]:
+    grouped: dict[int, List[Plan]] = defaultdict(list)
+
+    for plan in plans:
+        grouped[plan.record.property_id].append(plan)
+
+    return grouped
+
+
+def _choose_cheapest_relevant_plan(plans: List[Plan]) -> Plan:
+    plans_to_consider: List[Plan] = [p for p in plans if p.is_compliant] or plans
+
+    def plan_cost(plan: Plan) -> float:
+        return (
+            plan.record.cost_of_works
+            if plan.record.cost_of_works is not None
+            else float("inf")
+        )
+
+    cheapest_plan = min(plans_to_consider, key=plan_cost)
+
+    return cheapest_plan
+
+
+def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None:
+    plans_to_update: List[Plan] = []
+
+    for plan in plans:
+        should_be_default: bool = plan.id == cheapest_plan.id
+        if plan.record.is_default != should_be_default:
+            plan.set_default(should_be_default)
+            plans_to_update.append(plan)
+
+    if plans_to_update:
+        plan_models: List[PlanModel] = []
+        scenario_models: List[ScenarioModel] = []
+
+        for plan in plans_to_update:
+            plan_model, scenario_model = plan.to_sqlalchemy()
+            plan_models.append(plan_model)
+            scenario_models.append(scenario_model)
+
+        bulk_update_plans(plan_models, scenario_models)
--- a/backend/categorisation/tests/test_plan_is_compliant.py
+++ b/backend/categorisation/tests/test_plan_is_compliant.py
@ -0,0 +1,73 @@
+from typing import Callable
+import pytest
+from datetime import datetime
+
+from backend.app.domain.classes.plan import Plan
+from backend.app.domain.classes.scenario import Scenario
+from backend.app.domain.records.plan_record import PlanRecord
+from backend.app.domain.records.scenario_record import ScenarioRecord
+from backend.app.db.models.portfolio import Epc, PortfolioGoal
+
+
+@pytest.fixture
+def created_at_datetime() -> datetime:
+    return datetime.now()
+
+
+@pytest.fixture
+def epc_c_scenario(created_at_datetime: datetime) -> "Scenario":
+    # arrange
+    scenario_record = ScenarioRecord(
+        name="EPC C",
+        created_at=created_at_datetime,
+        housing_type="",
+        goal=PortfolioGoal.INCREASING_EPC,
+        goal_value="C",
+        trigger_file_path="",
+        multi_plan=False,
+        is_default=False,
+    )
+    return Scenario(record=scenario_record, id=1)
+
+
+@pytest.fixture
+def plan_factory(
+    epc_c_scenario: "Scenario", created_at_datetime: datetime
+) -> Callable[[int, "Epc"], "Plan"]:
+    # returns a function to create plans with different attributes
+    def _create_plan(post_sap_points: int, post_epc_rating: "Epc") -> "Plan":
+        plan_record = PlanRecord(
+            property_id=1,
+            portfolio_id=1,
+            created_at=created_at_datetime,
+            is_default=False,
+            post_sap_points=post_sap_points,
+            post_epc_rating=post_epc_rating,
+        )
+        return Plan(record=plan_record, scenario=epc_c_scenario, id=1)
+
+    return _create_plan
+
+
+@pytest.mark.parametrize(
+    "post_sap_points, post_epc_rating, expected_compliance",
+    [
+        (75, Epc.C, True),
+        (100, Epc.A, True),
+        (60, Epc.D, False),
+    ],
+)
+def test_scenario_goal_is_epc_c(
+    plan_factory: Callable[[int, "Epc"], "Plan"],
+    post_sap_points: int,
+    post_epc_rating: "Epc",
+    expected_compliance: bool,
+) -> None:
+    # arrange
+    plan = plan_factory(post_sap_points, post_epc_rating)
+
+    # act
+    actual_compliance: bool = plan.is_compliant
+
+    # assert
+    assert actual_compliance == expected_compliance
--- a/backend/condition/condition_trigger_request.py
+++ b/backend/condition/condition_trigger_request.py
@ -29,5 +29,5 @@ class ConditionTriggerRequest(BaseModel):
 # {
 #     "file_type": "LBWF",
 #     "trigger_file_bucket": "condition-data-dev",
-#     "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx",
+#     "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx"
 # }
--- a/backend/postcode_splitter/handler/Dockerfile
+++ b/backend/postcode_splitter/handler/Dockerfile
@ -1,9 +1,28 @@
-FROM public.ecr.aws/lambda/python:3.10
+FROM public.ecr.aws/lambda/python:3.11
+
+ARG DEV_DB_HOST
+ARG DEV_DB_PORT
+ARG DEV_DB_NAME
+
+ENV DB_HOST=${DEV_DB_HOST}
+ENV DB_PORT=${DEV_DB_PORT}
+ENV DB_NAME=${DEV_DB_NAME}

 # Set working directory (Lambda task root)
 WORKDIR /var/task

-# -----------------------------
+COPY backend/postcode_splitter/handler/requirements.txt .
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy necessary files for database and utility imports
+COPY utils/ utils/
+COPY backend/ backend/
+COPY datatypes/ datatypes/
+
+# Copy the handler
+COPY backend/postcode_splitter/main.py .
+
 # Lambda handler
-# -----------------------------
 CMD ["main.handler"]
+
--- a/backend/postcode_splitter/handler/requirements.txt
+++ b/backend/postcode_splitter/handler/requirements.txt
@ -0,0 +1,11 @@
+pandas==2.2.2
+numpy<2.0
+requests
+tqdm
+openpyxl
+epc-api-python==1.0.2
+boto3==1.35.44
+sqlmodel
+sqlalchemy==2.0.36
+psycopg2-binary==2.9.10
+pydantic-settings==2.6.0
--- a/backend/postcode_splitter/main.py
+++ b/backend/postcode_splitter/main.py
@ -1,127 +1,278 @@
+import os
+import sys
+import json
 import pandas as pd
 import requests
-from backend.address2UPRN.main import (
-    resolve_uprns_for_postcode_group,
-    get_epc_data_with_postcode,
+import boto3
+from uuid import UUID, uuid4
+from utils.s3 import (
+    read_csv_from_s3 as read_csv_from_s3_dict,
+    save_csv_to_s3,
+    parse_s3_uri,
 )
+from utils.logger import setup_logger
 from tqdm import tqdm
+from backend.app.db.functions.tasks.Tasks import SubTaskInterface
+from datetime import datetime
+
+logger = setup_logger()


-def sanitise_postcode(postcode: str) -> str | None:
+def upload_batch_to_s3(
+    batch_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
+) -> str:
    """
-    Normalise postcode for grouping.
-
-    - Uppercase
-    - Remove all whitespace
+    Upload batch DataFrame to S3 as CSV.
    """
-    if pd.isna(postcode):
-        return None
+    if bucket_name is None:
+        bucket_name = os.getenv("S3_BUCKET_NAME")

-    return postcode.upper().replace(" ", "")
-
-
-def is_valid_postcode(postcode_clean: str) -> bool:
-    """
-    Validate postcode using postcodes.io.
-
-    Expects a sanitised postcode (e.g. E84SQ).
-    Returns True if valid, False otherwise.
-    """
-    POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
-    if not postcode_clean:
-        return False
+    if not bucket_name:
+        logger.error(
+            "S3 bucket name not provided and S3_BUCKET_NAME environment variable not set"
+        )
+        raise ValueError("S3_BUCKET_NAME not configured")

    try:
-        resp = requests.get(
-            POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
-            timeout=5,
+        file_name = f"{datetime.now().isoformat()}_{str(uuid4())[:8]}"
+        file_key = (
+            f"ara_postcode_splitter_batches/{task_id}/{sub_task_id}/{file_name}.csv"
        )
-        resp.raise_for_status()
-        return resp.json().get("result", False)
-    except requests.RequestException:
-        # Network issues, rate limits, etc.
-        return False
+
+        success = save_csv_to_s3(batch_df, bucket_name, file_key)
+
+        if success:
+            s3_uri = f"s3://{bucket_name}/{file_key}"
+            logger.info(f"Successfully uploaded batch to {s3_uri}")
+            return s3_uri
+        else:
+            logger.error(f"Failed to upload batch to S3")
+            raise ValueError("Failed to save CSV to S3")
+
+    except Exception as e:
+        logger.error(f"Error uploading batch to S3: {str(e)}")
+        raise


-def main():
-    df = pd.read_excel("hackney.xlsx", sheet_name="Sustainability")
-    df = df.head(500)
+def send_to_address2uprn_queue(task_id: str, sub_task_id: str, s3_uri: str) -> str:
+    """
+    Send a batch to the address2UPRN SQS queue with S3 reference.

-    # Sanitise postcodes
-    df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode)
+    Args:
+        task_id: The parent task ID
+        sub_task_id: The new subtask ID for this batch
+        s3_uri: S3 URI pointing to the batch CSV file

-    # --- validate AFTER grouping (save API calls) ---
+    Returns:
+        Message ID from SQS
+    """
+    sqs_client = boto3.client("sqs")
+    queue_url = os.getenv("ADDRESS2UPRN_QUEUE_URL")

-    # Get unique, non-null postcodes
-    unique_postcodes = df["postcode_clean"].dropna().unique()
+    if not queue_url:
+        raise ValueError("ADDRESS2UPRN_QUEUE_URL environment variable not set")

-    # Validate each postcode once, TODOadd a progress bar
-    postcode_validity = {
-        pc: is_valid_postcode(pc)
-        for pc in tqdm(unique_postcodes, total=len(unique_postcodes))
+    message_body = {
+        "task_id": task_id,
+        "sub_task_id": sub_task_id,
+        "s3_uri": s3_uri,
    }

-    # Map validity back onto dataframe
-    df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
+    response = sqs_client.send_message(
+        QueueUrl=queue_url,
+        MessageBody=json.dumps(message_body),
+    )

+    logger.info(
+        f"Sent message to address2UPRN queue. "
+        f"Task: {task_id}, SubTask: {sub_task_id}, MessageId: {response['MessageId']}"
+    )
+
+    return response["MessageId"]
+
+
+def create_batch_and_send_to_address2uprn(
+    batch_df: pd.DataFrame,
+    task_id: str,
+    sub_task_id: str,
+    subtask_interface: SubTaskInterface,
+    bucket_name: str,
+) -> str:
+    """
+    Create a batch DataFrame, upload to S3, create subtask, and send to address2UPRN queue.
+
+    """
+    # Upload batch to S3
+
+    s3_uri = upload_batch_to_s3(batch_df, str(task_id), str(sub_task_id), bucket_name)
+
+    # Create a new subtask for this batch with all inputs
+    created_batch_sub_task_id = subtask_interface.create_subtask(
+        task_id=task_id,
+        inputs={
+            "task_id": str(task_id),
+            "s3_uri": s3_uri,
+        },
+    )
+
+    logger.info(f"Created batch subtask {created_batch_sub_task_id}")
+
+    # Send message with S3 reference
+    send_to_address2uprn_queue(
+        task_id=str(task_id),
+        sub_task_id=str(created_batch_sub_task_id),
+        s3_uri=s3_uri,
+    )
+
+    return created_batch_sub_task_id
+
+
+def handler(event, context, local=False):
+    print(f"Function: {context.function_name}")
+    print(f"Request ID: {context.aws_request_id}")
+
+    # Example SQS message for testing (copy and paste into SQS):
+    if local is True:
+        event = {
+            "Records": [
+                {
+                    "body": json.dumps(
+                        {
+                            "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
+                            "sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
+                            "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for Domna_transformed.csv",
+                        }
+                    )
+                }
+            ]
+        }
+    # Handle both single event and batch events (SQS, etc.)
+    records = event.get("Records", [event])
    results = []
+    errors = []
+    subtask_interface = SubTaskInterface()
+    bucket_name = os.getenv("S3_BUCKET_NAME")
+    if local:
+        bucket_name = "retrofit-data-dev"

-    for postcode, group_df in tqdm(
-        df[df["postcode_valid"]].groupby("postcode_clean"),
-        desc="Resolving UPRNs by postcode",
-    ):
-        try:
-            epc_df = get_epc_data_with_postcode(postcode)
+    for record in records:
+        if local:
+            record = records[0]
+        task_id = None
+        subtask_id = None
+        # Parse body (inputs)

-            if epc_df.empty:
-                tmp = group_df.copy()
-                tmp["found_uprn"] = None
-                tmp["status"] = "no_epc_results"
-                results.append(tmp)
-                continue
+        if isinstance(record.get("body"), str):
+            body = json.loads(record["body"])
+        else:
+            body = record.get("body", {})

-            resolved = resolve_uprns_for_postcode_group(
-                group_df=group_df,
-                epc_df=epc_df,
+        # Validate required fields
+        task_id = body.get("task_id")
+        subtask_id = body.get("sub_task_id")
+        s3_uri = body.get("s3_uri")
+
+        # Convert task_id to UUID
+        task_id = UUID(task_id) if isinstance(task_id, str) else task_id
+        subtask_id = UUID(subtask_id) if isinstance(subtask_id, str) else subtask_id
+
+        # Mark subtask as in progress
+        subtask_interface.update_subtask_status(subtask_id, "in progress")
+        logger.info(f"Marked subtask {subtask_id} as in progress")
+
+        # Read CSV from S3
+        bucket, key = parse_s3_uri(s3_uri)
+        logger.info(f"S3 Bucket: {bucket}, Key: {key}")
+
+        csv_data = read_csv_from_s3_dict(bucket, key)
+        df = pd.DataFrame(csv_data)
+
+        logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns")
+
+        # Sanitise postcodes
+        df["postcode_clean"] = df["postcode"].str.upper().str.replace(" ", "")
+
+        df = df.dropna(subset=["postcode_clean"])
+
+        batch_size = 500
+        if df.shape[0] < batch_size:
+            create_batch_and_send_to_address2uprn(
+                batch_df=df,
+                task_id=task_id,
+                sub_task_id=subtask_id,
+                subtask_interface=subtask_interface,
+                bucket_name=bucket_name,
            )
+        else:
+            postcode_to_addresses = {
+                postcode: group
+                for postcode, group in df.groupby("postcode_clean", sort=False)
+            }

-            results.append(resolved)
+            count = 0
+            buffer = []

-        except Exception as e:
-            tmp = group_df.copy()
-            tmp["found_uprn"] = None
-            tmp["status"] = "exception"
-            tmp["error"] = str(e)
-            results.append(tmp)
+            for postcode, group_df in postcode_to_addresses.items():
+                group_len = len(group_df)

-    final_df = pd.concat(results, ignore_index=True)
-    a = final_df[
-        [
-            "best_match_lexiscore",
-            "Address 1",
-            "best_match_address",
-            "Postcode",
-            "UPRN",
-            "best_match_uprn",
-        ]
-    ]  # add levi score to viewing
-    b = final_df[final_df["best_match_lexiscore"] > 0]  # add levi score to viewing
-    b = b[
-        [
-            "best_match_lexiscore",
-            "Address 1",
-            "best_match_address",
-            "Postcode",
-            "UPRN",
-            "best_match_uprn",
-        ]
-    ]
+                # If single postcode is bigger than batch_size → send directly
+                if group_len >= batch_size:
+                    if buffer:
+                        create_batch_and_send_to_address2uprn(
+                            batch_df=pd.concat(buffer, ignore_index=True),
+                            task_id=task_id,
+                            sub_task_id=subtask_id,
+                            subtask_interface=subtask_interface,
+                            bucket_name=bucket_name,
+                        )
+                        buffer = []
+                        count = 0

+                    create_batch_and_send_to_address2uprn(
+                        batch_df=group_df,
+                        task_id=task_id,
+                        sub_task_id=subtask_id,
+                        subtask_interface=subtask_interface,
+                        bucket_name=bucket_name,
+                    )
+                    continue

-def handler(event, context):
-    print("hello Postcode splitter world")
-    return {"statusCode": 200, "body": "hello world"}
+                # If adding would exceed batch → flush first
+                if count + group_len > batch_size:
+                    create_batch_and_send_to_address2uprn(
+                        batch_df=pd.concat(buffer, ignore_index=True),
+                        task_id=task_id,
+                        sub_task_id=subtask_id,
+                        subtask_interface=subtask_interface,
+                        bucket_name=bucket_name,
+                    )
+                    buffer = []
+                    count = 0

+                # Add group
+                buffer.append(group_df)
+                count += group_len

-if __name__ == "__main__":
-    main()
+            # Final flush
+            if buffer:
+                create_batch_and_send_to_address2uprn(
+                    batch_df=pd.concat(buffer, ignore_index=True),
+                    task_id=task_id,
+                    sub_task_id=subtask_id,
+                    subtask_interface=subtask_interface,
+                    bucket_name=bucket_name,
+                )
+
+    # Mark subtask as completed
+    subtask_interface.update_subtask_status(
+        subtask_id,
+        "completed",
+        outputs={"rows_processed": "completed"},
+    )
+
+    return {
+        "statusCode": 200,
+        "body": json.dumps(
+            {"processed": results, "errors": errors if errors else None}
+        ),
+    }
--- a/etl/customers/l_and_g/ic_slides.py
+++ b/etl/customers/l_and_g/ic_slides.py
@ -41,7 +41,10 @@ epc_data = pd.read_csv(

 # Classify floor area in <73m2, 73-98, 99-200, 200+
 epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply(
-    lambda x: "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+")
+    lambda x: (
+        "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+"
+    )
+)

 # 73-98     185
 # <73       156
@ -65,7 +68,11 @@ import pandas as pd
 import numpy as np
 from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine
-from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
+from backend.app.db.models.recommendations import (
+    Recommendation,
+    PlanModel,
+    PlanRecommendations,
+)
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel


@ -74,56 +81,79 @@ def get_data(portfolio_id, scenario_ids):
    session.begin()

    # Get properties and their details for a specific portfolio
-    properties_query = session.query(
-        PropertyModel,
-        PropertyDetailsEpcModel
-    ).join(
-        PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
-    ).filter(
-        PropertyModel.portfolio_id == portfolio_id  # Filter by portfolio ID
-    ).all()
+    properties_query = (
+        session.query(PropertyModel, PropertyDetailsEpcModel)
+        .join(
+            PropertyDetailsEpcModel,
+            PropertyModel.id == PropertyDetailsEpcModel.property_id,
+        )
+        .filter(PropertyModel.portfolio_id == portfolio_id)  # Filter by portfolio ID
+        .all()
+    )

    # Transform properties data to include all fields dynamically
    properties_data = [
-        {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
-         **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
-            PropertyDetailsEpcModel.__table__.columns}}
+        {
+            **{
+                col.name: getattr(prop.PropertyModel, col.name)
+                for col in PropertyModel.__table__.columns
+            },
+            **{
+                col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
+                for col in PropertyDetailsEpcModel.__table__.columns
+            },
+        }
        for prop in properties_query
    ]

    # Get property IDs from fetched properties

    # Get plans linked to the fetched properties
-    plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
+    plans_query = (
+        session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
+    )

    # Transform plans data to include all fields dynamically
    plans_data = [
-        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+        {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
        for plan in plans_query
    ]

    # Extract plan IDs for filtering recommendations through PlanRecommendations
-    plan_ids = [plan['id'] for plan in plans_data]
+    plan_ids = [plan["id"] for plan in plans_data]

    # Get recommendations through PlanRecommendations for those plans and that are default
-    recommendations_query = session.query(
-        Recommendation,
-        Plan.scenario_id
-    ).join(
-        PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
-    ).join(
-        Plan, Plan.id == PlanRecommendations.plan_id  # Join with Plan to access scenario_id
-    ).filter(
-        PlanRecommendations.plan_id.in_(plan_ids),
-        Recommendation.default == True  # Filtering for default recommendations
-    ).all()
+    recommendations_query = (
+        session.query(Recommendation, PlanModel.scenario_id)
+        .join(
+            PlanRecommendations,
+            Recommendation.id == PlanRecommendations.recommendation_id,
+        )
+        .join(
+            PlanModel,
+            PlanModel.id
+            == PlanRecommendations.plan_id,  # Join with Plan to access scenario_id
+        )
+        .filter(
+            PlanRecommendations.plan_id.in_(plan_ids),
+            Recommendation.default == True,  # Filtering for default recommendations
+        )
+        .all()
+    )

    # Transform recommendations data to include all fields dynamically and include scenario_id
    recommendations_data = [
-        {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
-                                                                                                           col.name) for
-            col in Recommendation.__table__.columns},
-         "Scenario ID": rec.scenario_id}
+        {
+            **{
+                col.name: (
+                    getattr(rec.Recommendation, col.name)
+                    if hasattr(rec, "Recommendation")
+                    else getattr(rec, col.name)
+                )
+                for col in Recommendation.__table__.columns
+            },
+            "Scenario ID": rec.scenario_id,
+        }
        for rec in recommendations_query
    ]

@ -132,7 +162,9 @@ def get_data(portfolio_id, scenario_ids):
    return properties_data, plans_data, recommendations_data


-properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[205])
+properties_data, plans_data, recommendations_data = get_data(
+    portfolio_id=124, scenario_ids=[205]
+)

 properties_df = pd.DataFrame(properties_data)
 plans_df = pd.DataFrame(plans_data)
@ -147,12 +179,12 @@ recommended_measures_df = recommended_measures_df.drop(columns=["default"])
 post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
 post_install_sap = post_install_sap[post_install_sap["default"]]
 # Sum up the sap points by property id
-post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
+post_install_sap = (
+    post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
+)

 recommendations_measures_pivot = recommended_measures_df.pivot(
-    index='property_id',
-    columns='measure_type',
-    values='estimated_cost'
+    index="property_id", columns="measure_type", values="estimated_cost"
 )
 recommendations_measures_pivot = recommendations_measures_pivot.reset_index()

@ -163,7 +195,7 @@ recommendations_measures_pivot = recommendations_measures_pivot.rename(
        "double_glazing": "Cost: Double Glazing",
        "loft_insulation": "Cost: Loft Insulation",
        "mechanical_ventilation": "Cost: Ventilation",
-        "solar_pv": "Cost: Solar PV"
+        "solar_pv": "Cost: Solar PV",
    }
 )
 recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
@ -186,16 +218,26 @@ recommendations_measures_pivot["Recommendation: Solar PV"] = (
    recommendations_measures_pivot["Cost: Solar PV"] > 0
 )

-df = properties_df[
-    [
-        "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
-        "current_epc_rating",
-        "current_sap_points", "total_floor_area", "number_of_rooms",
+df = (
+    properties_df[
+        [
+            "property_id",
+            "uprn",
+            "address",
+            "postcode",
+            "property_type",
+            "walls",
+            "roof",
+            "heating",
+            "windows",
+            "current_epc_rating",
+            "current_sap_points",
+            "total_floor_area",
+            "number_of_rooms",
+        ]
    ]
-].merge(
-    recommendations_measures_pivot, how="left", on="property_id"
-).merge(
-    post_install_sap, how="left", on="property_id"
+    .merge(recommendations_measures_pivot, how="left", on="property_id")
+    .merge(post_install_sap, how="left", on="property_id")
 )

 df = df.drop(columns=["property_id"])
@ -222,25 +264,36 @@ df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"])

 # We fill missings:
 for col in [
-    "Recommendation: Air Source Heat Pump", "Recommendation: Cavity Wall Insulation",
-    "Recommendation: Double Glazing", "Recommendation: Loft Insulation", "Recommendation: Ventilation",
-    "Recommendation: Solar PV"
+    "Recommendation: Air Source Heat Pump",
+    "Recommendation: Cavity Wall Insulation",
+    "Recommendation: Double Glazing",
+    "Recommendation: Loft Insulation",
+    "Recommendation: Ventilation",
+    "Recommendation: Solar PV",
 ]:
    df[col] = df[col].fillna(False)

 for col in [
-    "Cost: Air Source Heat Pump", "Cost: Cavity Wall Insulation",
-    "Cost: Double Glazing", "Cost: Loft Insulation", "Cost: Ventilation",
-    "Cost: Solar PV"
+    "Cost: Air Source Heat Pump",
+    "Cost: Cavity Wall Insulation",
+    "Cost: Double Glazing",
+    "Cost: Loft Insulation",
+    "Cost: Ventilation",
+    "Cost: Solar PV",
 ]:
    df[col] = df[col].fillna(0)

 # Calculate post SAP
 df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
 df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
-df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
+df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
+    lambda x: sap_to_epc(x)
+)

 df["Recommendation: Air Source Heat Pump"].sum()
 df["Cost: Air Source Heat Pump"].sum()

-df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", index=False)
+df.to_csv(
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv",
+    index=False,
+)
--- a/etl/customers/mod/pilot/2.
+++ b/etl/customers/mod/pilot/2.
@ -4,7 +4,11 @@ import numpy as np
 from backend.app.utils import sap_to_epc
 from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine
-from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
+from backend.app.db.models.recommendations import (
+    Recommendation,
+    PlanModel,
+    PlanRecommendations,
+)
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel


@ -13,56 +17,79 @@ def get_data(portfolio_id, scenario_ids):
    session.begin()

    # Get properties and their details for a specific portfolio
-    properties_query = session.query(
-        PropertyModel,
-        PropertyDetailsEpcModel
-    ).join(
-        PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
-    ).filter(
-        PropertyModel.portfolio_id == portfolio_id  # Filter by portfolio ID
-    ).all()
+    properties_query = (
+        session.query(PropertyModel, PropertyDetailsEpcModel)
+        .join(
+            PropertyDetailsEpcModel,
+            PropertyModel.id == PropertyDetailsEpcModel.property_id,
+        )
+        .filter(PropertyModel.portfolio_id == portfolio_id)  # Filter by portfolio ID
+        .all()
+    )

    # Transform properties data to include all fields dynamically
    properties_data = [
-        {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
-         **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
-            PropertyDetailsEpcModel.__table__.columns}}
+        {
+            **{
+                col.name: getattr(prop.PropertyModel, col.name)
+                for col in PropertyModel.__table__.columns
+            },
+            **{
+                col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
+                for col in PropertyDetailsEpcModel.__table__.columns
+            },
+        }
        for prop in properties_query
    ]

    # Get property IDs from fetched properties

    # Get plans linked to the fetched properties
-    plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
+    plans_query = (
+        session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
+    )

    # Transform plans data to include all fields dynamically
    plans_data = [
-        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+        {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
        for plan in plans_query
    ]

    # Extract plan IDs for filtering recommendations through PlanRecommendations
-    plan_ids = [plan['id'] for plan in plans_data]
+    plan_ids = [plan["id"] for plan in plans_data]

    # Get recommendations through PlanRecommendations for those plans and that are default
-    recommendations_query = session.query(
-        Recommendation,
-        Plan.scenario_id
-    ).join(
-        PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
-    ).join(
-        Plan, Plan.id == PlanRecommendations.plan_id  # Join with Plan to access scenario_id
-    ).filter(
-        PlanRecommendations.plan_id.in_(plan_ids),
-        Recommendation.default == True  # Filtering for default recommendations
-    ).all()
+    recommendations_query = (
+        session.query(Recommendation, PlanModel.scenario_id)
+        .join(
+            PlanRecommendations,
+            Recommendation.id == PlanRecommendations.recommendation_id,
+        )
+        .join(
+            PlanModel,
+            PlanModel.id
+            == PlanRecommendations.plan_id,  # Join with Plan to access scenario_id
+        )
+        .filter(
+            PlanRecommendations.plan_id.in_(plan_ids),
+            Recommendation.default == True,  # Filtering for default recommendations
+        )
+        .all()
+    )

    # Transform recommendations data to include all fields dynamically and include scenario_id
    recommendations_data = [
-        {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation')
-        else getattr(rec, col.name) for
-            col in Recommendation.__table__.columns},
-         "Scenario ID": rec.scenario_id}
+        {
+            **{
+                col.name: (
+                    getattr(rec.Recommendation, col.name)
+                    if hasattr(rec, "Recommendation")
+                    else getattr(rec, col.name)
+                )
+                for col in Recommendation.__table__.columns
+            },
+            "Scenario ID": rec.scenario_id,
+        }
        for rec in recommendations_query
    ]

@ -94,16 +121,34 @@ def app():
    )

    property_asset_data = properties_df.merge(
-        mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn"
+        mod_property_data.drop(columns=["address", "postcode", "tenure"]),
+        how="left",
+        on="uprn",
    )

-    property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False)
+    property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains(
+        "pitched", case=False
+    )
    property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970
-    property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip()
-    property_asset_data["is_insulated"] = (
-        property_asset_data["walls"].str.split(",").str[1].str.strip().isin(
-            ["filled cavity", "with external insulation", "filled cavity and external insulation"]
-        ) | property_asset_data["walls"].str.split(",").str[2].str.strip().isin(["insulated"])
+    property_asset_data["wall_type"] = (
+        property_asset_data["walls"].str.split(" ").str[0].str.strip()
+    )
+    property_asset_data["is_insulated"] = property_asset_data["walls"].str.split(
+        ","
+    ).str[1].str.strip().isin(
+        [
+            "filled cavity",
+            "with external insulation",
+            "filled cavity and external insulation",
+        ]
+    ) | property_asset_data[
+        "walls"
+    ].str.split(
+        ","
+    ).str[
+        2
+    ].str.strip().isin(
+        ["insulated"]
    )
    property_asset_data["is_insulated"] = np.where(
        property_asset_data["is_insulated"], "Insulated", "Uninsulated"
@ -115,18 +160,26 @@ def app():
        property_asset_data["pre_1970"], "Pre 1970", "Post 1970"
    )

-    archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970"]
+    archetype_variables = [
+        "property_type",
+        "wall_type",
+        "is_insulated",
+        "is_pitched",
+        "pre_1970",
+    ]

    assigned_archetypes = (
-        property_asset_data.groupby(
-            archetype_variables
-        ).size().reset_index().rename(columns={0: "n_properties"}).sort_values("n_properties", ascending=False)
+        property_asset_data.groupby(archetype_variables)
+        .size()
+        .reset_index()
+        .rename(columns={0: "n_properties"})
+        .sort_values("n_properties", ascending=False)
    )

    # Make the archetype ID a concatenation of the variables
-    assigned_archetypes["archetype_id"] = assigned_archetypes[archetype_variables].apply(
-        lambda x: "_".join(x.astype(str)), axis=1
-    )
+    assigned_archetypes["archetype_id"] = assigned_archetypes[
+        archetype_variables
+    ].apply(lambda x: "_".join(x.astype(str)), axis=1)

    # Most prominent archetypes
    prominent_archetypes = assigned_archetypes.head(6)
@ -136,7 +189,7 @@ def app():
    property_asset_data = property_asset_data.merge(
        assigned_archetypes[archetype_variables + ["archetype_id"]],
        how="left",
-        on=archetype_variables
+        on=archetype_variables,
    )

    # Create age bands:
@ -148,7 +201,7 @@ def app():
    property_asset_data["age_band"] = pd.cut(
        property_asset_data["BUILD_YEAR"],
        bins=[1959, 1969, 1979, 1989, 1999, 2022],
-        labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"]
+        labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"],
    )

    # Create floor area bands
@ -159,47 +212,59 @@ def app():
    property_asset_data["floor_area_band"] = pd.cut(
        property_asset_data["total_floor_area"],
        bins=[0, 73, 97, 199, 10000],
-        labels=["0-73", "74-97", "98-199", "200+"]
+        labels=["0-73", "74-97", "98-199", "200+"],
    )

    property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy()
    property_asset_data["archetype_group"] = np.where(
-        property_asset_data["archetype_id"].isin(other_archetypes["archetype_id"].values),
+        property_asset_data["archetype_id"].isin(
+            other_archetypes["archetype_id"].values
+        ),
        "other",
-        property_asset_data["archetype_group"]
+        property_asset_data["archetype_group"],
    )

    # For colour
    wall_types = (
-        property_asset_data[["wall_type"]].value_counts().to_frame().reset_index().rename(
-            columns={"wall_type": "Wall Type"}
-        )
+        property_asset_data[["wall_type"]]
+        .value_counts()
+        .to_frame()
+        .reset_index()
+        .rename(columns={"wall_type": "Wall Type"})
    )
    # Group into age bands
    ages = (
-        property_asset_data[["age_band"]].value_counts()
+        property_asset_data[["age_band"]]
+        .value_counts()
        .to_frame()
-        .reset_index().sort_values("age_band", ascending=True)
+        .reset_index()
+        .sort_values("age_band", ascending=True)
        .rename(columns={"age_band": "Age Band"})
    )
    floor_area_bands = (
-        property_asset_data[["floor_area_band"]].value_counts()
+        property_asset_data[["floor_area_band"]]
+        .value_counts()
        .to_frame()
-        .reset_index().sort_values("floor_area_band", ascending=True)
+        .reset_index()
+        .sort_values("floor_area_band", ascending=True)
        .rename(columns={"floor_area_band": "Floor Area Band"})
    )
    archetype_counts = (
-        property_asset_data[["archetype_group"]].
-        value_counts().
-        to_frame().
-        reset_index()
+        property_asset_data[["archetype_group"]]
+        .value_counts()
+        .to_frame()
+        .reset_index()
        .rename(columns={"archetype_group": "Archetype"})
    )
    property_types = (
-        (property_asset_data["property_type"] + ": " + property_asset_data["built_form"]).
-        value_counts().
-        to_frame().
-        reset_index()
+        (
+            property_asset_data["property_type"]
+            + ": "
+            + property_asset_data["built_form"]
+        )
+        .value_counts()
+        .to_frame()
+        .reset_index()
        .rename(columns={"index": "Property Type", 0: "Count"})
    )

@ -217,18 +282,24 @@ def app():
    totals = property_asset_data[
        [
            "Total_household_members",
-            "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
-            "heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
-            "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
+            "co2_emissions",
+            "current_energy_demand",
+            "current_energy_demand_heating_hotwater",
+            "heating_cost_current",
+            "hot_water_cost_current",
+            "lighting_cost_current",
+            "appliances_cost_current",
+            "gas_standing_charge",
+            "electricity_standing_charge",
        ]
    ].copy()
    totals["total_cost"] = (
-        totals["heating_cost_current"] +
-        totals["hot_water_cost_current"] +
-        totals["lighting_cost_current"] +
-        totals["appliances_cost_current"] +
-        totals["gas_standing_charge"] +
-        totals["electricity_standing_charge"]
+        totals["heating_cost_current"]
+        + totals["hot_water_cost_current"]
+        + totals["lighting_cost_current"]
+        + totals["appliances_cost_current"]
+        + totals["gas_standing_charge"]
+        + totals["electricity_standing_charge"]
    )
    print(
        totals[
@ -259,38 +330,59 @@ def app():

        scenario_recommendations_df = recommendations_df[
            recommendations_df["Scenario ID"] == scenario
-            ].copy()
+        ].copy()

-        scenario_recommendations_df["contingency"] = contingency * scenario_recommendations_df["estimated_cost"]
+        scenario_recommendations_df["contingency"] = (
+            contingency * scenario_recommendations_df["estimated_cost"]
+        )
        scenario_recommendations_df["total_cost"] = (
-            scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"]
+            scenario_recommendations_df["estimated_cost"]
+            + scenario_recommendations_df["contingency"]
        )

        recommended_measures_df = scenario_recommendations_df[
            ["property_id", "measure_type", "estimated_cost", "default"]
        ]

-        recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
+        recommended_measures_df = recommended_measures_df[
+            recommended_measures_df["default"]
+        ]
        recommended_measures_df = recommended_measures_df.drop(columns=["default"])

        # Metrics by property ID
        aggregated_metrics = scenario_recommendations_df[
            [
-                "property_id", "type", "default", "sap_points",
-                "energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency",
-                "total_cost"
+                "property_id",
+                "type",
+                "default",
+                "sap_points",
+                "energy_cost_savings",
+                "kwh_savings",
+                "co2_equivalent_savings",
+                "estimated_cost",
+                "contingency",
+                "total_cost",
            ]
        ]
        aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]]
-        aggregated_metrics = aggregated_metrics.groupby("property_id")[
-            ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
-             "total_cost", "contingency"]
-        ].sum().reset_index()
+        aggregated_metrics = (
+            aggregated_metrics.groupby("property_id")[
+                [
+                    "sap_points",
+                    "co2_equivalent_savings",
+                    "energy_cost_savings",
+                    "kwh_savings",
+                    "estimated_cost",
+                    "total_cost",
+                    "contingency",
+                ]
+            ]
+            .sum()
+            .reset_index()
+        )

        recommendations_measures_pivot = recommended_measures_df.pivot(
-            index='property_id',
-            columns='measure_type',
-            values='estimated_cost'
+            index="property_id", columns="measure_type", values="estimated_cost"
        )
        recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
        recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
@ -299,30 +391,58 @@ def app():
        for c in recommendations_measures_pivot.columns:
            if c == "property_id":
                continue
-            recommendations_measures_pivot["Recommendation: " + c] = recommendations_measures_pivot[c] > 0
+            recommendations_measures_pivot["Recommendation: " + c] = (
+                recommendations_measures_pivot[c] > 0
+            )

        # We now create a final output
-        df = properties_df[
-            [
-                "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
-                "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
-                "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
-                "heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
-                "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
+        df = (
+            properties_df[
+                [
+                    "property_id",
+                    "uprn",
+                    "address",
+                    "postcode",
+                    "property_type",
+                    "walls",
+                    "roof",
+                    "heating",
+                    "windows",
+                    "current_epc_rating",
+                    "current_sap_points",
+                    "total_floor_area",
+                    "number_of_rooms",
+                    "co2_emissions",
+                    "current_energy_demand",
+                    "current_energy_demand_heating_hotwater",
+                    "heating_cost_current",
+                    "hot_water_cost_current",
+                    "lighting_cost_current",
+                    "appliances_cost_current",
+                    "gas_standing_charge",
+                    "electricity_standing_charge",
+                ]
            ]
-        ].merge(
-            recommendations_measures_pivot, how="left", on="property_id"
-        ).merge(
-            aggregated_metrics, how="left", on="property_id"
+            .merge(recommendations_measures_pivot, how="left", on="property_id")
+            .merge(aggregated_metrics, how="left", on="property_id")
        )

        df["bills_total_cost"] = (
-            df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] +
-            df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"]
+            df["heating_cost_current"]
+            + df["hot_water_cost_current"]
+            + df["lighting_cost_current"]
+            + df["appliances_cost_current"]
+            + df["gas_standing_charge"]
+            + df["electricity_standing_charge"]
        )

        df = df.drop(columns=["property_id"])
-        for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]:
+        for c in [
+            "sap_points",
+            "co2_equivalent_savings",
+            "energy_cost_savings",
+            "kwh_savings",
+        ]:
            df[c] = df[c].fillna(0)

        df = df.rename(
@ -345,16 +465,23 @@ def app():
        # Calculate post SAP
        df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
        df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
-        df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
+        df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
+            lambda x: sap_to_epc(x)
+        )

        # Calculate the relative savings on carbon, kwh, and bills
-        df["relative_carbon_savings"] = df["co2_equivalent_savings"] / df["co2_emissions"]
+        df["relative_carbon_savings"] = (
+            df["co2_equivalent_savings"] / df["co2_emissions"]
+        )
        df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"]
        df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"]

        # Add on the archetype
        df = df.merge(
-            property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn"
+            property_asset_data[["uprn", "archetype_group"]],
+            how="left",
+            left_on="UPRN",
+            right_on="uprn",
        )

        # For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it
@ -387,7 +514,9 @@ def app():

    printing_scenario_id = scenario_ids[0]
    # EPC breakdown
-    print(scenario_data[printing_scenario_id]['Predicted Post Works EPC'].value_counts())
+    print(
+        scenario_data[printing_scenario_id]["Predicted Post Works EPC"].value_counts()
+    )
    # Cost
    # Total cost
    print(scenario_data[printing_scenario_id]["total_cost"].sum())
@ -408,16 +537,24 @@ def app():
    measure_details = {}
    for scenario in scenario_ids:
        measure_details[scenario] = {}
-        recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c]
-        measure_details[scenario]["count"] = scenario_data[scenario][recommendation_cols].sum().to_dict()
+        recommendation_cols = [
+            c for c in scenario_data[scenario].columns if "Recommendation:" in c
+        ]
+        measure_details[scenario]["count"] = (
+            scenario_data[scenario][recommendation_cols].sum().to_dict()
+        )
        # Get average cost per measure
        measure_columns = [
-            c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c
+            c.split("Recommendation: ")[1]
+            for c in scenario_data[scenario].columns
+            if "Recommendation:" in c
        ]
        # Take the mean, drop zero columns
        measure_costs = {}
        for m in measure_columns:
-            measure_costs[m] = float(scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean())
+            measure_costs[m] = float(
+                scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean()
+            )
        measure_details[scenario]["cost_per_measure"] = measure_costs

    pprint(measure_details[scenario_ids[0]]["count"])
@ -452,12 +589,27 @@ def app():
    for scenario in scenario_ids:
        df = scenario_data[scenario].copy()

-        avg_savings = df[
-            ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
-             "total_cost", "contingency"]
-        ].mean().to_dict()
-        avg_savings["cost_per_sap_point"] = avg_savings["total_cost"] / avg_savings["sap_points"]
-        avg_savings["cost_per_carbon"] = avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
+        avg_savings = (
+            df[
+                [
+                    "sap_points",
+                    "co2_equivalent_savings",
+                    "energy_cost_savings",
+                    "kwh_savings",
+                    "estimated_cost",
+                    "total_cost",
+                    "contingency",
+                ]
+            ]
+            .mean()
+            .to_dict()
+        )
+        avg_savings["cost_per_sap_point"] = (
+            avg_savings["total_cost"] / avg_savings["sap_points"]
+        )
+        avg_savings["cost_per_carbon"] = (
+            avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
+        )
        scenario_metrics[scenario] = avg_savings

    pprint(scenario_metrics[scenario_ids[0]])
@ -465,11 +617,11 @@ def app():

    scenario_data[scenario_ids[0]]["loft_insulation"][
        scenario_data[scenario_ids[0]]["loft_insulation"] > 0
-        ].mean()
+    ].mean()

    scenario_data[scenario_ids[0]]["cavity_wall_insulation"][
        scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0
-        ].mean()
+    ].mean()

    # Testing checking floor risk

@ -477,11 +629,7 @@ def app():

    def get_flood_risk(lat, lon, radius_km=1):
        url = "https://environment.data.gov.uk/flood-monitoring/id/floods"
-        params = {
-            'lat': lat,
-            'long': lon,
-            'dist': radius_km  # search radius in km
-        }
+        params = {"lat": lat, "long": lon, "dist": radius_km}  # search radius in km

        response = requests.get(url, params=params)
        response.raise_for_status()
@ -495,20 +643,19 @@ def app():
            print(f"{len(flood_warnings)} warning(s) found near the location:")
            for warning in flood_warnings:
                print(f"- Area: {warning.get('description')}")
-                print(f"  Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})")
+                print(
+                    f"  Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})"
+                )
                print(f"  Message changed at: {warning.get('timeMessageChanged')}")
                print()

        return flood_warnings

    from shapely.geometry import shape, Point
+
    def get_flood_areas_near_point(lat, lon, radius_km=2):
        url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas"
-        params = {
-            'lat': lat,
-            'long': lon,
-            'dist': radius_km
-        }
+        params = {"lat": lat, "long": lon, "dist": radius_km}

        response = requests.get(url, params=params)
        response.raise_for_status()
@ -531,7 +678,7 @@ def app():
            if not features:
                continue

-            flood_polygon = shape(features[0]['geometry'])
+            flood_polygon = shape(features[0]["geometry"])

            try:
                is_inside = flood_polygon.contains(point)
@ -539,12 +686,17 @@ def app():
                is_inside = False

            if is_inside:
-                print(f"📍 Point is inside flood area: {area['label']} ({area['notation']})")
+                print(
+                    f"📍 Point is inside flood area: {area['label']} ({area['notation']})"
+                )
                return area

    from tqdm import tqdm
+
    floor_warnings_data = []
-    for _, property in tqdm(property_asset_data.iterrows(), total=len(property_asset_data)):
+    for _, property in tqdm(
+        property_asset_data.iterrows(), total=len(property_asset_data)
+    ):
        # warnings = floor_warnings_data.extend(
        #     get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1)
        # )
@ -556,7 +708,7 @@ def app():
                    "uprn": property["uprn"],
                    "address": property["address"],
                    "postcode": property["postcode"],
-                    "area": resp
+                    "area": resp,
                }
            )
            continue
@ -570,7 +722,7 @@ def app():
        "House_Cavity_Uninsulated_Pitched roof_Post 1970",
        "other",
        "House_System_Uninsulated_Pitched roof_Pre 1970",
-        "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970"
+        "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970",
    ]

    values = [62, 36, 21, 16, 16, 4, 2]
@ -582,36 +734,39 @@ def app():
        "Cavity wall insulation, ventilation",
        "Bespoke retrofit measures",
        "External wall insulation, roof insulation",
-        "Flat roof insulation, internal wall insulation"
+        "Flat roof insulation, internal wall insulation",
    ]

-    fig = go.Figure(go.Treemap(
-        labels=labels,
-        parents=[""] * len(labels),  # No root
-        values=values,
-        hovertext=hovertext,
-        hoverinfo="text",
-        textinfo="none",
-        marker=dict(
-            line=dict(color="white", width=4),
-            colors=values,
-            colorscale="Blues"
+    fig = go.Figure(
+        go.Treemap(
+            labels=labels,
+            parents=[""] * len(labels),  # No root
+            values=values,
+            hovertext=hovertext,
+            hoverinfo="text",
+            textinfo="none",
+            marker=dict(
+                line=dict(color="white", width=4), colors=values, colorscale="Blues"
+            ),
        )
-    ))
+    )

    fig.update_layout(
-        margin=dict(t=10, l=10, r=10, b=10),
-        plot_bgcolor="white",
-        paper_bgcolor="white"
+        margin=dict(t=10, l=10, r=10, b=10), plot_bgcolor="white", paper_bgcolor="white"
    )

    fig.show()

    # Get the recommended measures by scenario id
-    recommendation_cols = [c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c]
-    measure_counts_by_scenario = scenario_data[scenario_ids[1]].groupby("archetype_group")[
-        recommendation_cols
-    ].sum().reset_index()
+    recommendation_cols = [
+        c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c
+    ]
+    measure_counts_by_scenario = (
+        scenario_data[scenario_ids[1]]
+        .groupby("archetype_group")[recommendation_cols]
+        .sum()
+        .reset_index()
+    )

    measure_counts_by_scenario.to_csv(
        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv"
@ -630,15 +785,13 @@ def app():

        to_append = {"uprn": uprn}
        for _id in scenario_ids:
-            scenario = scenario_data[_id][
-                scenario_data[_id]["uprn"] == uprn
-                ].squeeze()
+            scenario = scenario_data[_id][scenario_data[_id]["uprn"] == uprn].squeeze()

            val = PropertyValuation.estimate_valuation_improvement(
                current_value=x["valuation"],
                current_epc=scenario["Current EPC Rating"].value,
                target_epc=scenario["Predicted Post Works EPC"],
-                total_cost=None
+                total_cost=None,
            )

            to_append[_id] = val["average_increase"]
--- a/etl/customers/newhaven/slides.py
+++ b/etl/customers/newhaven/slides.py
--- a/Project/d_restart_failed_subtasks.py
+++ b/Project/d_restart_failed_subtasks.py
@ -10,6 +10,7 @@ Additionally, we wil find the problematic records and remove them
 Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan
 or recommendations in case something went wrong
 """
+
 import pandas as pd
 from sqlalchemy.orm import Session
 from backend.app.db.models.portfolio import PropertyModel
@ -19,8 +20,7 @@ from backend.app.db.connection import db_session
 def get_uprns_for_portfolio(session: Session, portfolio_id: int) -> list[int]:
    return [
        uprn
-        for (uprn,) in
-        session.query(PropertyModel.uprn)
+        for (uprn,) in session.query(PropertyModel.uprn)
        .filter(PropertyModel.portfolio_id == portfolio_id)
        .all()
        if uprn is not None
@ -34,7 +34,7 @@ with db_session() as session:
 sal = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
    "data.xlsx",
-    sheet_name="Standardised Asset List"
+    sheet_name="Standardised Asset List",
 )

 missed_properties = sal[~sal["epc_os_uprn"].isin(completed_uprns)]
@ -44,7 +44,7 @@ missed_properties.to_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
    "d_failed_properties_to_restart_20260102.xlsx",
    sheet_name="Standardised Asset List",
-    index=False
+    index=False,
 )

 # Fixing an error - triggered jobs without removing EWI/IWI so need to delete all plans associated to these scenarios:
@ -52,14 +52,14 @@ scenario_id = None

 from sqlalchemy import select, func
 from sqlalchemy.orm import Session
-from backend.app.db.models.recommendations import Plan
+from backend.app.db.models.recommendations import PlanModel


 def count_plans_for_scenario(session: Session, scenario_id: int) -> int:
    return session.execute(
        select(func.count())
-        .select_from(Plan)
-        .where(Plan.scenario_id == scenario_id)
+        .select_from(PlanModel)
+        .where(PlanModel.scenario_id == scenario_id)
    ).scalar_one()


@ -69,8 +69,7 @@ with db_session() as session:

 def get_plan_ids_for_scenario(session: Session, scenario_id: int) -> list[int]:
    result = session.execute(
-        select(Plan.id)
-        .where(Plan.scenario_id == scenario_id)
+        select(PlanModel.id).where(PlanModel.scenario_id == scenario_id)
    )
    return [row.id for row in result]

@ -84,7 +83,7 @@ from sqlalchemy.orm import Session

 def chunked(iterable, size):
    for i in range(0, len(iterable), size):
-        yield iterable[i:i + size]
+        yield iterable[i : i + size]


 from sqlalchemy import text
@ -103,12 +102,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # recommendation_materials
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation_materials rm
            USING plan_recommendations pr
            WHERE rm.recommendation_id = pr.recommendation_id
              AND pr.plan_id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

@ -116,10 +117,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # plan_recommendations
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan_recommendations
            WHERE plan_id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

@ -127,14 +130,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # recommendations (only those used by these plans)
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation r
            WHERE r.id IN (
                SELECT DISTINCT recommendation_id
                FROM plan_recommendations
                WHERE plan_id = ANY(:plan_ids)
            )
-        """),
+        """
+        ),
        params,
    )

@ -142,10 +147,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # plans LAST
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan
            WHERE id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

--- a/etl/customers/peabody/Nov
+++ b/etl/customers/peabody/Nov
@ -5,6 +5,7 @@ This includes:
 # EPC C, there should be a plan
 2) If the plan is fabric first, make sure they are actually fabric first
 """
+
 import pandas as pd

 scenario_names = {
@ -33,7 +34,9 @@ for scenario_id, scenario_name in scenario_names.items():
    )

    # find properties that are below the scenario sap target, but have no recommended measures
-    df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id]
+    df["below_scenario_target"] = (
+        df["current_sap_points"] < scenario_sap_targets[scenario_id]
+    )
    df["no_recommended_measures"] = df["sap_points"] == 0
    df["zero_cost"] = df["total_retrofit_cost"] == 0
    df["sap_points_above_zero"] = df["sap_points"] > 0
@ -45,7 +48,9 @@ for scenario_id, scenario_name in scenario_names.items():
    ].copy()

    if scenario_sap_targets[scenario_id] == 81:
-        problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"]
+        problematic_properties = problematic_properties[
+            problematic_properties["property_type"] != "Flat"
+        ]

    zero_cost_above_zero_sap = df[
        (df["sap_points_above_zero"] & df["zero_cost"])
@ -61,8 +66,12 @@ for scenario_id, scenario_name in scenario_names.items():
    # pd.set_option('display.width', 1000)
    # problematic_properties.head(len(problematic_properties))

-    print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})")
-    print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})")
+    print(
+        f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})"
+    )
+    print(
+        f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})"
+    )

    problems.append(problematic_properties)
    problems.append(zero_cost_above_zero_sap)
@ -97,12 +106,12 @@ all_problems = all_problems.drop_duplicates(subset=["uprn"])
 sal = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
    "data.xlsx",
-    sheet_name="Standardised Asset List"
+    sheet_name="Standardised Asset List",
 )
 sal2 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
    "UPRNS.xlsx",
-    sheet_name="Standardised Asset List"
+    sheet_name="Standardised Asset List",
 )

 sal = pd.concat([sal, sal2])
@ -114,7 +123,7 @@ retry.to_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
    "d_problematic_properties_to_review_20260106.xlsx",
    sheet_name="Standardised Asset List",
-    index=False
+    index=False,
 )

 # Delete associated plans
@ -126,19 +135,20 @@ uprns = retry["epc_os_uprn"].tolist()
 from sqlalchemy.orm import Session
 from backend.app.db.models.portfolio import PropertyModel
 from backend.app.db.connection import db_session
-from backend.app.db.models.recommendations import Plan
+from backend.app.db.models.recommendations import PlanModel
 from sqlalchemy import select, delete
 from sqlalchemy.exc import NoResultFound
 from sqlalchemy.orm import sessionmaker


-def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]:
+def get_property_ids_for_uprns(
+    session: Session, portfolio_id: int, uprns: list[int]
+) -> list[int]:
    return [
        property.id
        for property in session.query(PropertyModel)
        .filter(
-            PropertyModel.portfolio_id == portfolio_id,
-            PropertyModel.uprn.in_(uprns)
+            PropertyModel.portfolio_id == portfolio_id, PropertyModel.uprn.in_(uprns)
        )
        .all()
    ]
@ -149,15 +159,21 @@ with db_session() as session:


 # Get all and delete plans for these property IDs
-def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]:
-    return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all()
+def get_all_plans_for_property_ids(
+    session: Session, property_ids: list[int]
+) -> list[PlanModel]:
+    return (
+        session.query(PlanModel).filter(PlanModel.property_id.in_(property_ids)).all()
+    )


-def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]:
+def get_ids_of_plans_for_deletion(
+    session: Session, property_ids: list[int]
+) -> list[int]:
    return [
        plan.id
-        for plan in session.query(Plan)
-        .filter(Plan.property_id.in_(property_ids))
+        for plan in session.query(PlanModel)
+        .filter(PlanModel.property_id.in_(property_ids))
        .all()
    ]

@ -168,7 +184,7 @@ with db_session() as session:

 def chunked(iterable, size):
    for i in range(0, len(iterable), size):
-        yield iterable[i:i + size]
+        yield iterable[i : i + size]


 from sqlalchemy import text
@ -187,12 +203,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # recommendation_materials
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation_materials rm
            USING plan_recommendations pr
            WHERE rm.recommendation_id = pr.recommendation_id
              AND pr.plan_id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

@ -200,10 +218,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # plan_recommendations
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan_recommendations
            WHERE plan_id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

@ -211,14 +231,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # recommendations (only those used by these plans)
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation r
            WHERE r.id IN (
                SELECT DISTINCT recommendation_id
                FROM plan_recommendations
                WHERE plan_id = ANY(:plan_ids)
            )
-        """),
+        """
+        ),
        params,
    )

@ -226,10 +248,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # plans LAST
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan
            WHERE id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

--- a/Project/g_rebaselining_installed_measrues.py
+++ b/Project/g_rebaselining_installed_measrues.py
--- a/Project/h_reset_estimated_epcs.py
+++ b/Project/h_reset_estimated_epcs.py
@ -3,31 +3,41 @@ from sqlalchemy.orm import Session
 from sqlalchemy import text, select
 from backend.app.db.connection import db_read_session
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
-from backend.app.db.models.recommendations import Plan
+from backend.app.db.models.recommendations import PlanModel

 PORTFOLIO_ID = 435

 with db_read_session() as session:
    # Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419
-    estimated_epcs = session.query(PropertyDetailsEpcModel).filter(
-        # PropertyDetailsEpcModel.estimated == True,
-        PropertyDetailsEpcModel.property_id.in_(
-            session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == PORTFOLIO_ID)
+    estimated_epcs = (
+        session.query(PropertyDetailsEpcModel)
+        .filter(
+            # PropertyDetailsEpcModel.estimated == True,
+            PropertyDetailsEpcModel.property_id.in_(
+                session.query(PropertyModel.id).filter(
+                    PropertyModel.portfolio_id == PORTFOLIO_ID
+                )
+            )
        )
-    ).all()
+        .all()
+    )

    # Get the ids
    estimated_epc_ids = [epc.property_id for epc in estimated_epcs]

 # I want to get the UPRNS for these properties, from the property model
 with db_read_session() as session:
-    estimated_uprns = session.query(PropertyModel.uprn).filter(
-        PropertyModel.id.in_(
-            session.query(PropertyDetailsEpcModel.property_id).filter(
-                PropertyDetailsEpcModel.id.in_(estimated_epc_ids)
+    estimated_uprns = (
+        session.query(PropertyModel.uprn)
+        .filter(
+            PropertyModel.id.in_(
+                session.query(PropertyDetailsEpcModel.property_id).filter(
+                    PropertyDetailsEpcModel.id.in_(estimated_epc_ids)
+                )
            )
        )
-    ).all()
+        .all()
+    )

    estimated_uprns_list = [uprn for (uprn,) in estimated_uprns]

@ -35,16 +45,16 @@ with db_read_session() as session:
 sal_1 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
    "data.xlsx",
-    sheet_name="Standardised Asset List"
+    sheet_name="Standardised Asset List",
 )
 sal_2 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
    "UPRNS.xlsx",
-    sheet_name="Standardised Asset List"
+    sheet_name="Standardised Asset List",
 )

 sal = pd.concat([sal_1, sal_2])
-sal = sal.drop_duplicates(subset=['epc_os_uprn'])
+sal = sal.drop_duplicates(subset=["epc_os_uprn"])

 estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy()

@ -55,20 +65,24 @@ SCENARIOS = [
    # 861,  # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
    # 859,  # EPC C - no solid floor, ashp 3.0
    # 885,  # EPC B - fabric first, no solid floor, ashp 3.0
-    908, 909, 910
+    908,
+    909,
+    910,
 ]

 # Get all plans, associated to these properties - the property IDs are in estimated_epc_ids
 with db_read_session() as session:
    result = session.execute(
-        select(Plan.id, Plan.property_id)
-        .where(Plan.property_id.in_(estimated_epc_ids))
+        select(PlanModel.id, PlanModel.property_id).where(
+            PlanModel.property_id.in_(estimated_epc_ids)
+        )
    )
    plans = [
        {
            "plan_id": row.id,
            "property_id": row.property_id,
-        } for row in result
+        }
+        for row in result
    ]

 df = pd.DataFrame(plans)
@ -96,12 +110,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # recommendation_materials
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation_materials rm
            USING plan_recommendations pr
            WHERE rm.recommendation_id = pr.recommendation_id
              AND pr.plan_id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

@ -109,10 +125,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # plan_recommendations
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan_recommendations
            WHERE plan_id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )

@ -120,14 +138,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # recommendations (only those used by these plans)
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM recommendation r
            WHERE r.id IN (
                SELECT DISTINCT recommendation_id
                FROM plan_recommendations
                WHERE plan_id = ANY(:plan_ids)
            )
-        """),
+        """
+        ),
        params,
    )

@ -135,17 +155,21 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
    # plans LAST
    # ----------------------------
    session.execute(
-        text("""
+        text(
+            """
            DELETE FROM plan
            WHERE id = ANY(:plan_ids)
-        """),
+        """
+        ),
        params,
    )


 # Store the SAL
-filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 "
-            "sal.xlsx")
+filename = (
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 "
+    "sal.xlsx"
+)

 with pd.ExcelWriter(filename) as writer:
    sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
@ -164,34 +188,36 @@ with pd.ExcelWriter(filename) as writer:
 b1 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
    "sal.xlsx",
-    sheet_name="batch 1"
+    sheet_name="batch 1",
 )
 b2 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
    "sal.xlsx",
-    sheet_name="batch 2"
+    sheet_name="batch 2",
 )
 b3 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
    "sal.xlsx",
-    sheet_name="batch 3"
+    sheet_name="batch 3",
 )
 b4 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
    "sal.xlsx",
-    sheet_name="batch 4"
+    sheet_name="batch 4",
 )
 b5 = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
    "sal.xlsx",
-    sheet_name="batch 5"
+    sheet_name="batch 5",
 )
 # Batch 6 should be the remaining
 total = pd.concat([b1, b2, b3, b4, b5])
 remaining = sal[~sal["epc_os_uprn"].isin(total["epc_os_uprn"].values)]
 # Create new output
-filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/"
-            "20260107 corrected batch 6 sal.xlsx")
+filename = (
+    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/"
+    "20260107 corrected batch 6 sal.xlsx"
+)

 with pd.ExcelWriter(filename) as writer:
    sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
@ -206,6 +232,4 @@ with pd.ExcelWriter(filename) as writer:
    b5.to_excel(writer, sheet_name="batch 5", index=False)
    remaining.to_excel(writer, sheet_name="batch 6", index=False)

-all_together = pd.concat(
-    [b1, b2, b3, b4, b5, remaining]
-)
+all_together = pd.concat([b1, b2, b3, b4, b5, remaining])
--- a/etl/customers/peabody/Nov
+++ b/etl/customers/peabody/Nov
@ -110,14 +110,17 @@ import pandas as pd
 # Solar PV savings - we need the amount of solar PV bill savings
 from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine
-from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
+from backend.app.db.models.recommendations import (
+    Recommendation,
+    PlanModel,
+    PlanRecommendations,
+    RecommendationMaterials,
+)
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
 from collections import defaultdict

 PORTFOLIO_ID = 485  # Peabody
-SCENARIOS = [
-    970
-]
+SCENARIOS = [970]
 scenario_names = {
    970: "EPC C - no solid floor, ashp 3.0",
 }
@ -130,22 +133,26 @@ def get_data(portfolio_id, scenario_ids):
    # --------------------
    # Properties
    # --------------------
-    properties_query = session.query(
-        PropertyModel,
-        PropertyDetailsEpcModel
-    ).join(
-        PropertyDetailsEpcModel,
-        PropertyModel.id == PropertyDetailsEpcModel.property_id
-    ).filter(
-        PropertyModel.portfolio_id == portfolio_id
-    ).all()
+    properties_query = (
+        session.query(PropertyModel, PropertyDetailsEpcModel)
+        .join(
+            PropertyDetailsEpcModel,
+            PropertyModel.id == PropertyDetailsEpcModel.property_id,
+        )
+        .filter(PropertyModel.portfolio_id == portfolio_id)
+        .all()
+    )

    properties_data = [
        {
-            **{col.name: getattr(p.PropertyModel, col.name)
-               for col in PropertyModel.__table__.columns},
-            **{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
-               for col in PropertyDetailsEpcModel.__table__.columns},
+            **{
+                col.name: getattr(p.PropertyModel, col.name)
+                for col in PropertyModel.__table__.columns
+            },
+            **{
+                col.name: getattr(p.PropertyDetailsEpcModel, col.name)
+                for col in PropertyDetailsEpcModel.__table__.columns
+            },
        }
        for p in properties_query
    ]
@ -153,12 +160,12 @@ def get_data(portfolio_id, scenario_ids):
    # --------------------
    # Plans
    # --------------------
-    plans_query = session.query(Plan).filter(
-        Plan.scenario_id.in_(scenario_ids)
-    ).all()
+    plans_query = (
+        session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
+    )

    plans_data = [
-        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+        {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
        for plan in plans_query
    ]

@ -167,27 +174,29 @@ def get_data(portfolio_id, scenario_ids):
    # --------------------
    # Recommendations (NO materials yet)
    # --------------------
-    recommendations_query = session.query(
-        Recommendation,
-        Plan.scenario_id
-    ).join(
-        PlanRecommendations,
-        Recommendation.id == PlanRecommendations.recommendation_id
-    ).join(
-        Plan,
-        Plan.id == PlanRecommendations.plan_id
-    ).filter(
-        PlanRecommendations.plan_id.in_(plan_ids),
-        Recommendation.default.is_(True),
-        Recommendation.already_installed.is_(False)
-    ).all()
+    recommendations_query = (
+        session.query(Recommendation, PlanModel.scenario_id)
+        .join(
+            PlanRecommendations,
+            Recommendation.id == PlanRecommendations.recommendation_id,
+        )
+        .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
+        .filter(
+            PlanRecommendations.plan_id.in_(plan_ids),
+            Recommendation.default.is_(True),
+            Recommendation.already_installed.is_(False),
+        )
+        .all()
+    )

    recommendations_data = [
        {
-            **{col.name: getattr(r.Recommendation, col.name)
-               for col in Recommendation.__table__.columns},
+            **{
+                col.name: getattr(r.Recommendation, col.name)
+                for col in Recommendation.__table__.columns
+            },
            "scenario_id": r.scenario_id,
-            "materials": []  # placeholder
+            "materials": [],  # placeholder
        }
        for r in recommendations_query
    ]
@ -197,23 +206,25 @@ def get_data(portfolio_id, scenario_ids):
    # --------------------
    # Recommendation materials (SEPARATE QUERY)
    # --------------------
-    materials_query = session.query(
-        RecommendationMaterials
-    ).filter(
-        RecommendationMaterials.recommendation_id.in_(recommendation_ids)
-    ).all()
+    materials_query = (
+        session.query(RecommendationMaterials)
+        .filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids))
+        .all()
+    )

    # Group materials by recommendation_id
    materials_by_recommendation = defaultdict(list)

    for m in materials_query:
-        materials_by_recommendation[m.recommendation_id].append({
-            "material_id": m.material_id,
-            "depth": m.depth,
-            "quantity": m.quantity,
-            "quantity_unit": m.quantity_unit,
-            "estimated_cost": m.estimated_cost,
-        })
+        materials_by_recommendation[m.recommendation_id].append(
+            {
+                "material_id": m.material_id,
+                "depth": m.depth,
+                "quantity": m.quantity,
+                "quantity_unit": m.quantity_unit,
+                "estimated_cost": m.estimated_cost,
+            }
+        )

    # Attach materials safely (no filtering side effects)
    for r in recommendations_data:
@ -236,12 +247,11 @@ with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer:
    recommendations_df.to_excel(writer, sheet_name="recommendations", index=False)
    properties_df.to_excel(writer, sheet_name="properties", index=False)

-    
+
 # solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
 # average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()


-
 # # Check tenures
 # initial_asset_data = pd.read_excel(
 #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
--- a/Project/m_reduced_sample_revised.py
+++ b/Project/m_reduced_sample_revised.py
@ -4,7 +4,7 @@ import pandas as pd
 full_sal = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
    "SAL/Depracated/20260107 corrected batch 6 sal.xlsx",
-    sheet_name="Standardised Asset List"
+    sheet_name="Standardised Asset List",
 )

 # ------Pull in the reduced sample ------
@ -12,7 +12,7 @@ full_sal = pd.read_excel(
 reduced_sal = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - "
    "ownership filtered sal.xlsx",
-    sheet_name="Standardised Asset List"
+    sheet_name="Standardised Asset List",
 )

 # ------ Pull in the confirmed ownership column from Peabody ------
@ -20,18 +20,20 @@ new_asset_data = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
    "- Peabody "
    "- Data Extracts for Domna v2.xlsx",
-    sheet_name="Properties"
+    sheet_name="Properties",
 )

 correct_sample = new_asset_data[
    ~new_asset_data["AH Tenure"].isin(
-        ["Commercial",
-         "Freeholder",
-         "HOMEBUY / EQUITY LOAN",
-         "Leaseholder",
-         "Outright Sale",
-         "SHARED EQUITY",
-         "Shared Ownership"]
+        [
+            "Commercial",
+            "Freeholder",
+            "HOMEBUY / EQUITY LOAN",
+            "Leaseholder",
+            "Outright Sale",
+            "SHARED EQUITY",
+            "Shared Ownership",
+        ]
    )
 ].copy()

@ -41,9 +43,7 @@ stuff_to_add = correct_sample[
    ~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values)
 ]["UPRN"].values

-sal_to_add = full_sal[
-    full_sal["domna_property_id"].isin(stuff_to_add)
-].copy()
+sal_to_add = full_sal[full_sal["domna_property_id"].isin(stuff_to_add)].copy()

 # ------- Stuff to remove -------
 stuff_to_remove = reduced_sal[
@ -88,7 +88,7 @@ from backend.app.db.models.portfolio import PropertyModel
 from backend.app.db.connection import db_session, db_read_session
 from sqlalchemy import select, func
 from sqlalchemy.orm import Session
-from backend.app.db.models.recommendations import Plan
+from backend.app.db.models.recommendations import PlanModel

 uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist()

--- a/etl/customers/slide_utils.py
+++ b/etl/customers/slide_utils.py
@ -7,7 +7,7 @@ from sqlalchemy.sql import true
 from backend.app.db.utils import row2dict
 from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
 from backend.app.db.models.recommendations import Recommendation
-from backend.app.db.models.recommendations import Plan
+from backend.app.db.models.recommendations import PlanModel
 from backend.app.utils import sap_to_epc

 EPC_COLOURS = {
@ -17,7 +17,7 @@ EPC_COLOURS = {
    "D": "#fdd401",
    "E": "#fdab67",
    "F": "#ee8023",
-    "G": "#e71437"
+    "G": "#e71437",
 }


@ -33,22 +33,27 @@ def get_properties_with_default_recommendations(session: Session, portfolio_id:
             its associated default recommendations if any.
    """
    # Adjust the join to correctly filter recommendations while including all properties
-    query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation,
-                                                                   (Recommendation.property_id == PropertyModel.id) & (
-                                                                       Recommendation.default == true())) \
-        .filter(PropertyModel.portfolio_id == portfolio_id) \
+    query = (
+        session.query(PropertyModel, Recommendation)
+        .outerjoin(
+            Recommendation,
+            (Recommendation.property_id == PropertyModel.id)
+            & (Recommendation.default == true()),
+        )
+        .filter(PropertyModel.portfolio_id == portfolio_id)
        .all()
+    )

    properties = {}
    for property, recommendation in query:
        # Ensure the property is added once with an empty list of recommendations initially
        if property.id not in properties:
            properties[property.id] = row2dict(property)
-            properties[property.id]['recommendations'] = []
+            properties[property.id]["recommendations"] = []

        # Append recommendations if they exist and meet the criteria (already filtered by the query)
        if recommendation and recommendation.default:
-            properties[property.id]['recommendations'].append(row2dict(recommendation))
+            properties[property.id]["recommendations"].append(row2dict(recommendation))

    return list(properties.values())

@ -62,11 +67,16 @@ def get_property_details_by_portfolio_id(session: Session, portfolio_id: int):
    :return: A list of dictionaries, where each dictionary represents a property's details.
             Returns an empty list if no property details are found.
    """
-    property_details = session.query(PropertyDetailsEpcModel).filter(
-        PropertyDetailsEpcModel.portfolio_id == portfolio_id).all()
+    property_details = (
+        session.query(PropertyDetailsEpcModel)
+        .filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id)
+        .all()
+    )

    # Convert the SQLAlchemy objects to dictionaries
-    property_details_dict = [row2dict(pd) for pd in property_details] if property_details else []
+    property_details_dict = (
+        [row2dict(pd) for pd in property_details] if property_details else []
+    )

    return property_details_dict

@ -80,7 +90,9 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
    :return: A list of dictionaries, where each dictionary represents a plan.
             Returns an empty list if no plans are found.
    """
-    plans = session.query(Plan).filter(Plan.portfolio_id == portfolio_id).all()
+    plans = (
+        session.query(PlanModel).filter(PlanModel.portfolio_id == portfolio_id).all()
+    )

    # Convert the SQLAlchemy objects to dictionaries
    plans_dict = [row2dict(plan) for plan in plans] if plans else []
@ -88,7 +100,14 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
    return plans_dict


-def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
+def plot_epc_distribution(
+    df,
+    customer_key,
+    title="Your Units",
+    background_color="white",
+    bar_height=0.4,
+    font_size=15,
+):
    """
    Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes.
    Allows setting the plot background color and dynamically adjusts text size and bar spacing.
@ -100,75 +119,113 @@ def plot_epc_distribution(df, customer_key, title='Your Units', background_color
    :param font_size: Base font size for text annotations (default 15)
    """
    # Calculate dynamic figure size or adjust based on preferences
-    square_size = max(6, len(df) * 0.6)  # Ensure minimum size and adjust based on number of entries
+    square_size = max(
+        6, len(df) * 0.6
+    )  # Ensure minimum size and adjust based on number of entries
    fig, ax = plt.subplots(figsize=(square_size, square_size))
    fig.patch.set_facecolor(background_color)  # Set figure background color
    ax.set_facecolor(background_color)  # Set axes background color

-    df['percentage'] = df['percentage'].round(1)  # Round the percentage values to 1 decimal place
-    df_sorted = df.sort_values('percentage', ascending=True)
+    df["percentage"] = df["percentage"].round(
+        1
+    )  # Round the percentage values to 1 decimal place
+    df_sorted = df.sort_values("percentage", ascending=True)

    # Plot bars with specified height for adjustable thickness
-    bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'],
-                   color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height)
+    bars = ax.barh(
+        df_sorted["current_epc_rating"],
+        df_sorted["percentage"],
+        color=df_sorted["current_epc_rating"].map(EPC_COLOURS),
+        edgecolor="none",
+        height=bar_height,
+    )

-    epc_rating_font_size = font_size * 2  # EPC rating font size larger than base font size
-    count_percentage_font_size = font_size  # Count (percentage) font size as base font size
+    epc_rating_font_size = (
+        font_size * 2
+    )  # EPC rating font size larger than base font size
+    count_percentage_font_size = (
+        font_size  # Count (percentage) font size as base font size
+    )

    # Annotate bars with EPC ratings inside and count with percentage values outside
    for index, bar in enumerate(bars):
        width = bar.get_width()
-        epc_rating = df_sorted.iloc[index]['current_epc_rating']
-        count = df_sorted.iloc[index]['count']
-        percentage = df_sorted.iloc[index]['percentage']
+        epc_rating = df_sorted.iloc[index]["current_epc_rating"]
+        count = df_sorted.iloc[index]["count"]
+        percentage = df_sorted.iloc[index]["percentage"]

        # EPC rating inside the bar with increased font size
-        ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2,
-                f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size)
+        ax.text(
+            width - (width * 0.05),
+            bar.get_y() + bar.get_height() / 2,
+            f"{epc_rating}",
+            va="center",
+            ha="right",
+            color="white",
+            fontsize=epc_rating_font_size,
+        )

        # Count and percentage outside the bar, original font size
-        ax.text(width + 1, bar.get_y() + bar.get_height() / 2,
-                f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size)
+        ax.text(
+            width + 1,
+            bar.get_y() + bar.get_height() / 2,
+            f"{count} ({percentage}%)",
+            va="center",
+            color="black",
+            fontsize=count_percentage_font_size,
+        )

-    ax.set_title(title, fontsize=font_size * 1.2)  # Adjust title font size proportionally
-    ax.tick_params(axis='x', which='both', bottom=False, top=False,
-                   labelbottom=False)  # Remove x-axis tick marks and values
-    ax.tick_params(axis='y', which='both', left=False, right=False,
-                   labelleft=False)  # Remove y-axis tick marks and labels
-    ax.spines['top'].set_visible(False)  # Remove top spine
-    ax.spines['right'].set_visible(False)  # Remove right spine
-    ax.spines['left'].set_visible(False)  # Remove left spine
-    ax.spines['bottom'].set_visible(False)  # Remove bottom spine
+    ax.set_title(
+        title, fontsize=font_size * 1.2
+    )  # Adjust title font size proportionally
+    ax.tick_params(
+        axis="x", which="both", bottom=False, top=False, labelbottom=False
+    )  # Remove x-axis tick marks and values
+    ax.tick_params(
+        axis="y", which="both", left=False, right=False, labelleft=False
+    )  # Remove y-axis tick marks and labels
+    ax.spines["top"].set_visible(False)  # Remove top spine
+    ax.spines["right"].set_visible(False)  # Remove right spine
+    ax.spines["left"].set_visible(False)  # Remove left spine
+    ax.spines["bottom"].set_visible(False)  # Remove bottom spine

    plt.tight_layout()  # Adjust layout
    plt.show()

    # Save the figure as an image
-    figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png'
-    fig.savefig(figure_path, bbox_inches='tight')
+    figure_path = f"etl/customers/{customer_key}/epc_distribution_plot.png"
+    fig.savefig(figure_path, bbox_inches="tight")
    plt.close(fig)  # Close the figure to free memory

    return fig, figure_path


-def save_plot_to_image(figure, path='plot.png'):
+def save_plot_to_image(figure, path="plot.png"):
    """
    Saves a matplotlib figure to an image file for insertion into PowerPoint.
    """
-    figure.savefig(path, bbox_inches='tight')
+    figure.savefig(path, bbox_inches="tight")
    plt.close(figure)


-def save_figure_as_image(figure, filename='temp_plot.png'):
+def save_figure_as_image(figure, filename="temp_plot.png"):
    """
    Saves a matplotlib figure to an image file.
    """
    figure.savefig(filename, dpi=300)
-    plt.close(figure)  # Close the figure to prevent it from displaying in notebooks or Python environments
+    plt.close(
+        figure
+    )  # Close the figure to prevent it from displaying in notebooks or Python environments


-def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inches(1), width_inches=Inches(8),
-                                height_inches=Inches(2)):
+def add_commentary_with_bullets(
+    slide,
+    commentary,
+    top_inches,
+    left_inches=Inches(1),
+    width_inches=Inches(8),
+    height_inches=Inches(2),
+):
    """
    Adds commentary with bullet points to a slide.

@ -179,7 +236,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche
    :param width_inches: The width of the commentary text box.
    :param height_inches: The height of the commentary text box.
    """
-    txBox = slide.shapes.add_textbox(left_inches, top_inches, width_inches, height_inches)
+    txBox = slide.shapes.add_textbox(
+        left_inches, top_inches, width_inches, height_inches
+    )
    tf = txBox.text_frame

    # Configure text frame
@ -192,7 +251,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche

    for i, section in enumerate(sections):
        if i > 0:
-            p = tf.add_paragraph()  # Add a new paragraph for each section after the first
+            p = (
+                tf.add_paragraph()
+            )  # Add a new paragraph for each section after the first
        else:
            p = tf.paragraphs[0]  # Use the first paragraph for the first section
        p.text = section
@ -215,7 +276,9 @@ def add_slide_with_image(prs, title, img_path=None, commentary=None):
    # Determine the position of the commentary text box based on whether an image is included
    if img_path:
        # Add the image
-        slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5))
+        slide.shapes.add_picture(
+            img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5)
+        )
        # Position for commentary when image is present
        commentary_top = Inches(6)
    else:
@ -237,16 +300,18 @@ def create_powerpoint(data, save_location):
    prs = Presentation()

    for slide, slide_data in data.items():
-        slide_figure_path = data[slide].get('image_path')
-        text = data[slide].get('text')
-        title = data[slide].get('title', "")
+        slide_figure_path = data[slide].get("image_path")
+        text = data[slide].get("text")
+        title = data[slide].get("title", "")
        add_slide_with_image(prs, title, slide_figure_path, text)

    # Save the presentation
    prs.save(save_location)


-def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target):
+def create_recommendations_summary(
+    recommendations_df, properties_df, property_details_df, sap_target
+):
    # Aggregate the impact of the recommendations
    # We want:
    # Total number of sap points
@ -254,40 +319,52 @@ def create_recommendations_summary(recommendations_df, properties_df, property_d
    # total bill savings
    # total cost
    # Total Co2 impact
-    recommendations_summary = recommendations_df.groupby(["property_id"]).agg(
-        total_sap_points=("sap_points", "sum"),
-        total_valuation_impact=("property_valuation_increase", "sum"),
-        total_bill_savings=("energy_cost_savings", "sum"),
-        total_cost=("estimated_cost", "sum"),
-        total_carbon=("co2_equivalent_savings", "sum"),
-        adjusted_heat_demand=("adjusted_heat_demand", "sum")
-    ).reset_index()
+    recommendations_summary = (
+        recommendations_df.groupby(["property_id"])
+        .agg(
+            total_sap_points=("sap_points", "sum"),
+            total_valuation_impact=("property_valuation_increase", "sum"),
+            total_bill_savings=("energy_cost_savings", "sum"),
+            total_cost=("estimated_cost", "sum"),
+            total_carbon=("co2_equivalent_savings", "sum"),
+            adjusted_heat_demand=("adjusted_heat_demand", "sum"),
+        )
+        .reset_index()
+    )
    # Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill
    recommendations_summary = recommendations_summary.merge(
-        properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
-        how="left"
+        properties_df[["id", "uprn", "current_sap_points"]].rename(
+            columns={"id": "property_id"}
+        ),
+        on="property_id",
+        how="left",
    )

    recommendations_summary["expected_sap_points"] = (
-        recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
+        recommendations_summary["current_sap_points"]
+        + recommendations_summary["total_sap_points"]
    )
-    recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply(
-        lambda x: sap_to_epc(x)
+    recommendations_summary["expected_epc_rating"] = recommendations_summary[
+        "expected_sap_points"
+    ].apply(lambda x: sap_to_epc(x))
+    recommendations_summary["sap_difference"] = (
+        sap_target - recommendations_summary["expected_sap_points"]
    )
-    recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"]

    if property_details_df is not None:
        recommendations_summary = recommendations_summary.merge(
-            property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename(
+            property_details_df[
+                ["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]
+            ].rename(
                columns={
                    "id": "property_id",
                    "co2_emissions": "current_co2",
                    "adjusted_energy_consumption": "current_energy",
-                    "energy_bill": "current_energy_bill"
+                    "energy_bill": "current_energy_bill",
                }
            ),
            on="uprn",
-            how="left"
+            how="left",
        )

    return recommendations_summary
--- a/infrastructure/terraform/lambda/_template/main.tf
+++ b/infrastructure/terraform/lambda/_template/main.tf
@ -1,3 +1,30 @@
+# ==============================================================================
+# TEMPLATE: Lambda Configuration with Optional S3 IAM Policy
+# ==============================================================================
+# Instructions:
+# 1. Replace "REPLACE ME" with your lambda name (e.g., "my-lambda-name")
+# 2. Add any additional environment variables as needed
+# 3. To attach S3 IAM policies from shared state:
+#    - Uncomment the S3 policy attachment section below
+#    - Update the policy_arn to match the output from shared/main.tf
+#    - Available shared outputs (examples):
+#      - data.terraform_remote_state.shared.outputs.condition_etl_s3_read_arn
+#      - data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn
+# 4. To create a NEW S3 policy:
+#    - Add a new module "lambda_s3_policy" in shared/main.tf using the
+#      s3_iam_policy module (see examples in shared/main.tf)
+#    - Then reference it here using data.terraform_remote_state.shared.outputs
+# ==============================================================================
+
+data "terraform_remote_state" "shared" {
+  backend = "s3"
+  config = {
+    bucket = "assessment-model-terraform-state"
+    key = "env:/${var.stage}/terraform.tfstate"
+    region = "eu-west-2"
+  }
+}
+
 module "lambda" {
  source = "../modules/lambda_with_sqs"

@ -12,3 +39,25 @@ module "lambda" {
    LOG_LEVEL = "info"
  }
 }
+
+# ======================================================================
+# OPTIONAL: Attach S3 IAM policy to Lambda execution role
+# ======================================================================
+# Uncomment and configure the resource below to attach S3 permissions
+#
+# Example 1: Attach existing policy from shared state
+# resource "aws_iam_role_policy_attachment" "lambda_s3_policy" {
+#   role       = module.lambda.role_name
+#   policy_arn = data.terraform_remote_state.shared.outputs.YOUR_POLICY_OUTPUT_NAME_arn
+# }
+#
+# Example 2: Attach multiple policies
+# resource "aws_iam_role_policy_attachment" "lambda_read_policy" {
+#   role       = module.lambda.role_name
+#   policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn
+# }
+#
+# resource "aws_iam_role_policy_attachment" "lambda_write_policy" {
+#   role       = module.lambda.role_name
+#   policy_arn = data.terraform_remote_state.shared.outputs.another_policy_arn
+# }
--- a/infrastructure/terraform/lambda/address2UPRN/main.tf
+++ b/infrastructure/terraform/lambda/address2UPRN/main.tf
@ -1,3 +1,19 @@
+data "terraform_remote_state" "shared" {
+  backend = "s3"
+  config = {
+    bucket = "assessment-model-terraform-state"
+    key = "env:/${var.stage}/terraform.tfstate"
+    region = "eu-west-2"
+  }
+}
+data "aws_secretsmanager_secret_version" "db_credentials" {
+  secret_id = "${var.stage}/assessment_model/db_credentials"
+}
+
+locals {
+  db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
+}
+
 module "address2uprn" {
  source = "../modules/lambda_with_sqs"

@ -6,9 +22,32 @@ module "address2uprn" {

  image_uri = local.image_uri

-
-  environment = {
-    STAGE     = var.stage
-    LOG_LEVEL = "info"
-  }
+  environment = merge(
+    {
+      STAGE     = var.stage
+      LOG_LEVEL = "info"
+      DB_USERNAME = local.db_credentials.db_assessment_model_username
+      DB_PASSWORD = local.db_credentials.db_assessment_model_password
+      GOOGLE_SOLAR_API_KEY = "test"
+      SAP_PREDICTIONS_BUCKET = "test"
+      CARBON_PREDICTIONS_BUCKET = "test"
+      HEAT_PREDICTIONS_BUCKET = "test"
+      HEATING_KWH_PREDICTIONS_BUCKET = "test"
+      HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
+      API_KEY = "test"
+      ENVIRONMENT = "test"
+      SECRET_KEY = "test"
+      PLAN_TRIGGER_BUCKET = "test"
+      DATA_BUCKET = "test"
+      ENGINE_SQS_URL = "test"
+      ENERGY_ASSESSMENTS_BUCKET = "test"
+      S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
+    },
+  )
 }
+
+# Attach S3 read policy to the Lambda execution role
+resource "aws_iam_role_policy_attachment" "address2uprn_read_and_write" {
+  role       = module.address2uprn.role_name
+  policy_arn = data.terraform_remote_state.shared.outputs.address_2_uprn_s3_read_and_write_arn
+}
--- a/infrastructure/terraform/lambda/address2UPRN/outputs.tf
+++ b/infrastructure/terraform/lambda/address2UPRN/outputs.tf
@ -0,0 +1,14 @@
+output "address2uprn_queue_url" {
+  value       = module.address2uprn.queue_url
+  description = "URL of the address2UPRN SQS queue"
+}
+
+output "address2uprn_queue_arn" {
+  value       = module.address2uprn.queue_arn
+  description = "ARN of the address2UPRN SQS queue"
+}
+
+output "address2uprn_lambda_arn" {
+  value       = module.address2uprn.lambda_arn
+  description = "ARN of the address2UPRN Lambda function"
+}
--- a/infrastructure/terraform/lambda/condition-etl/main.tf
+++ b/infrastructure/terraform/lambda/condition-etl/main.tf
@ -23,7 +23,6 @@ module "lambda" {
  stage = var.stage

  image_uri = local.image_uri
-  timeout = 180


  environment = merge(
--- a/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf
+++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf
@ -9,3 +9,4 @@ output "queue_arn" {
 output "queue_url" {
  value = module.queue.queue_url
 }
+
--- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf
+++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf
@ -1,3 +1,30 @@
+data "terraform_remote_state" "shared" {
+  backend = "s3"
+  config = {
+    bucket = "assessment-model-terraform-state"
+    key = "env:/${var.stage}/terraform.tfstate" 
+    region = "eu-west-2"
+  }
+}
+data "aws_secretsmanager_secret_version" "db_credentials" {
+  secret_id = "${var.stage}/assessment_model/db_credentials"
+}
+
+
+locals {
+  db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
+}
+
+# Reference the existing address2UPRN Lambda outputs from address2uprn state
+data "terraform_remote_state" "address2uprn" {
+  backend = "s3"
+  config = {
+    bucket = "address2uprn-terraform-state"
+    key = "env:/${var.stage}/terraform.tfstate"
+    region = "eu-west-2"
+  }
+}
+
 module "lambda" {
  source = "../modules/lambda_with_sqs"

@ -7,8 +34,56 @@ module "lambda" {
  image_uri = local.image_uri


-  environment = {
-    STAGE = var.stage
-    LOG_LEVEL = "info"
-  }
+  environment = merge(
+    {
+      STAGE     = var.stage
+      LOG_LEVEL = "info"
+      DB_USERNAME = local.db_credentials.db_assessment_model_username
+      DB_PASSWORD = local.db_credentials.db_assessment_model_password
+      GOOGLE_SOLAR_API_KEY = "test"
+      SAP_PREDICTIONS_BUCKET = "test"
+      CARBON_PREDICTIONS_BUCKET = "test"
+      HEAT_PREDICTIONS_BUCKET = "test"
+      HEATING_KWH_PREDICTIONS_BUCKET = "test"
+      HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
+      API_KEY = "test"
+      ENVIRONMENT = "test"
+      SECRET_KEY = "test"
+      PLAN_TRIGGER_BUCKET = "test"
+      DATA_BUCKET = "test"
+      EPC_AUTH_TOKEN = "test"
+      ENGINE_SQS_URL = "test"
+      ENERGY_ASSESSMENTS_BUCKET = "test"
+      ADDRESS2UPRN_QUEUE_URL = data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_url
+      S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
+    },
+  )
 }
+
+# Attach S3 read policy to the Lambda execution role
+resource "aws_iam_role_policy_attachment" "postcode_splitter_s3_read" {
+  role       = module.lambda.role_name
+  policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn
+}
+
+# Create SQS send policy for address2UPRN queue
+module "postcode_splitter_sqs_policy" {
+  source = "../../modules/general_iam_policy"
+
+  policy_name        = "postcode-splitter-sqs-send-${var.stage}"
+  policy_description = "Allow postcode-splitter Lambda to send messages to address2UPRN queue"
+
+  actions = [
+    "sqs:SendMessage"
+  ]
+
+  resources = [
+    data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_arn
+  ]
+}
+
+# Attach SQS policy to the Lambda execution role
+resource "aws_iam_role_policy_attachment" "postcode_splitter_sqs_send" {
+  role       = module.lambda.role_name
+  policy_arn = module.postcode_splitter_sqs_policy.policy_arn
+}
--- a/infrastructure/terraform/lambda/postcodeSplitter/variables.tf
+++ b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf
@ -24,3 +24,12 @@ locals {
 output "resolved_image_uri" {
  value = local.image_uri
 }
+
+
+
+
+
+
+
+
+
--- a/infrastructure/terraform/modules/general_iam_policy/main.tf
+++ b/infrastructure/terraform/modules/general_iam_policy/main.tf
@ -0,0 +1,21 @@
+# IAM Policy with dynamic actions and resources
+resource "aws_iam_policy" "policy" {
+  name        = var.policy_name
+  description = var.policy_description
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      merge(
+        {
+          Effect   = "Allow"
+          Action   = var.actions
+          Resource = var.resources
+        },
+        var.conditions != null ? { Condition = var.conditions } : {}
+      )
+    ]
+  })
+
+  tags = var.tags
+}
--- a/infrastructure/terraform/modules/general_iam_policy/outputs.tf
+++ b/infrastructure/terraform/modules/general_iam_policy/outputs.tf
@ -0,0 +1,9 @@
+output "policy_arn" {
+  value       = aws_iam_policy.policy.arn
+  description = "ARN of the created IAM policy"
+}
+
+output "policy_name" {
+  value       = aws_iam_policy.policy.name
+  description = "Name of the created IAM policy"
+}
--- a/infrastructure/terraform/modules/general_iam_policy/variables.tf
+++ b/infrastructure/terraform/modules/general_iam_policy/variables.tf
@ -0,0 +1,32 @@
+variable "policy_name" {
+  description = "Name of the IAM policy"
+  type        = string
+}
+
+variable "policy_description" {
+  description = "Description of the IAM policy"
+  type        = string
+  default     = ""
+}
+
+variable "actions" {
+  description = "List of IAM actions allowed by this policy"
+  type        = list(string)
+}
+
+variable "resources" {
+  description = "List of AWS resources this policy applies to"
+  type        = list(string)
+}
+
+variable "conditions" {
+  description = "Optional IAM policy conditions"
+  type        = any
+  default     = null
+}
+
+variable "tags" {
+  description = "Tags to apply to the policy"
+  type        = map(string)
+  default     = {}
+}
--- a/infrastructure/terraform/modules/lambda_execution_role/main.tf
+++ b/infrastructure/terraform/modules/lambda_execution_role/main.tf
@ -19,19 +19,3 @@ resource "aws_iam_role_policy_attachment" "basic_logs" {
  policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
 }

-resource "aws_iam_role_policy" "ecr_pull" {
-  role = aws_iam_role.this.name
-
-  policy = jsonencode({
-    Version = "2012-10-17"
-    Statement = [{
-      Effect = "Allow"
-      Action = [
-        "ecr:GetAuthorizationToken",
-        "ecr:BatchGetImage",
-        "ecr:GetDownloadUrlForLayer"
-      ]
-      Resource = "*"
-    }]
-  })
-}
--- a/infrastructure/terraform/modules/s3_iam_policy/main.tf
+++ b/infrastructure/terraform/modules/s3_iam_policy/main.tf
@ -0,0 +1,31 @@
+# Dynamically build S3 resources list from bucket ARNs and resource paths
+locals {
+  # Generate full resource ARNs by combining bucket ARNs with resource paths
+  resources = flatten([
+    for bucket_arn in var.bucket_arns : [
+      for path in var.resource_paths : "${bucket_arn}${path}"
+    ]
+  ])
+}
+
+# IAM Policy with dynamic actions and resources
+resource "aws_iam_policy" "s3_policy" {
+  name        = var.policy_name
+  description = var.policy_description
+
+  policy = jsonencode({
+    Version = "2012-10-17"
+    Statement = [
+      merge(
+        {
+          Effect   = "Allow"
+          Action   = var.actions
+          Resource = local.resources
+        },
+        var.conditions != null ? { Condition = var.conditions } : {}
+      )
+    ]
+  })
+
+  tags = var.tags
+}
--- a/infrastructure/terraform/modules/s3_iam_policy/outputs.tf
+++ b/infrastructure/terraform/modules/s3_iam_policy/outputs.tf
@ -0,0 +1,14 @@
+output "policy_arn" {
+  description = "ARN of the S3 IAM policy"
+  value       = aws_iam_policy.s3_policy.arn
+}
+
+output "policy_name" {
+  description = "Name of the S3 IAM policy"
+  value       = aws_iam_policy.s3_policy.name
+}
+
+output "policy_id" {
+  description = "ID of the S3 IAM policy"
+  value       = aws_iam_policy.s3_policy.id
+}
--- a/infrastructure/terraform/modules/s3_iam_policy/variables.tf
+++ b/infrastructure/terraform/modules/s3_iam_policy/variables.tf
@ -0,0 +1,42 @@
+variable "policy_name" {
+  description = "Name of the IAM policy"
+  type        = string
+}
+
+variable "policy_description" {
+  description = "Description of the IAM policy"
+  type        = string
+  default     = ""
+}
+
+variable "bucket_arns" {
+  description = "List of S3 bucket ARNs to grant access to"
+  type        = list(string)
+}
+
+variable "actions" {
+  description = "List of S3 actions to allow (e.g., ['s3:GetObject'], ['s3:PutObject'], ['s3:DeleteObject'])"
+  type        = list(string)
+  default     = ["s3:GetObject"]
+}
+
+variable "resource_paths" {
+  description = "List of resource paths within buckets (e.g., ['/*'] for all objects, ['/specific-prefix/*'] for specific prefix)"
+  type        = list(string)
+  default     = ["/*"]
+}
+
+variable "conditions" {
+  description = "Optional IAM policy conditions to apply to the statement"
+  type        = any
+  default     = null
+}
+
+variable "tags" {
+  description = "Tags to apply to the policy"
+  type        = map(string)
+  default     = {}
+}
+
+
+
--- a/infrastructure/terraform/shared/main.tf
+++ b/infrastructure/terraform/shared/main.tf
@ -133,6 +133,11 @@ module "retrofit_sap_data" {
  allowed_origins = var.allowed_origins
 }

+output "retrofit_sap_data_bucket_name" {
+  value = module.retrofit_sap_data.bucket_name
+  description = "Name of the retrofit SAP data bucket"
+}
+
 module "retrofit_carbon_predictions" {
  source          = "../modules/s3"
  bucketname      = "retrofit-carbon-predictions-${var.stage}"
@ -305,6 +310,21 @@ module "address2uprn_registry" {

 }

+# S3 policy for postcode splitter to read from retrofit data bucket
+module "address2uprn_s3_read_and_write" {
+  source = "../modules/s3_iam_policy"
+
+  policy_name        = "Address2UPRNReadandWriteS3"
+  policy_description = "Allow address2uprn Lambda to read and write from retrofit-data bucket"
+  bucket_arns        = ["arn:aws:s3:::retrofit-data-${var.stage}"]
+  actions            = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"]
+  resource_paths     = ["/*"]
+}
+
+output "address_2_uprn_s3_read_and_write_arn" {
+  value = module.address2uprn_s3_read_and_write.policy_arn
+}
+
 ################################################
 # Condition ETL – Lambda ECR
 ################################################
@ -321,6 +341,28 @@ module "condition_etl_registry" {

 }

+# Condition Data S3 Bucket to store initial data
+module "condition_data_bucket" {
+  source      = "../modules/s3"
+  bucketname = "condition-data-${var.stage}"
+  allowed_origins = var.allowed_origins
+}
+
+module "condition_etl_s3_read" {
+  source = "../modules/s3_iam_policy"
+
+  policy_name        = "ConditionETLReadS3"
+  policy_description = "Allow Lambda to read objects from condition-data-${var.stage}"
+  bucket_arns        = ["arn:aws:s3:::condition-data-${var.stage}"]
+  actions            = ["s3:GetObject"]
+  resource_paths     = ["/*"]
+}
+
+output "condition_etl_s3_read_arn" {
+  value = module.condition_etl_s3_read.policy_arn
+}
+
+
 ################################################
 # Postcode Splitter – Lambda ECR
 ################################################
@ -337,30 +379,17 @@ module "postcode_splitter_registry" {

 }

-################################################
-# Conidition data – S3 bucket
-################################################
-module "condition_data_bucket" {
-  source      = "../modules/s3"
-  bucketname = "condition-data-${var.stage}"
-  allowed_origins = var.allowed_origins
+# S3 policy for postcode splitter to read from retrofit data bucket
+module "postcode_splitter_s3_read" {
+  source = "../modules/s3_iam_policy"
+
+  policy_name        = "PostcodeSplitterReadS3"
+  policy_description = "Allow postcode splitter Lambda to read from retrofit-data bucket"
+  bucket_arns        = ["arn:aws:s3:::retrofit-data-${var.stage}"]
+  actions            = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"]
+  resource_paths     = ["/*"]
 }

-resource "aws_iam_policy" "condition_etl_s3_read" {
-  name        = "ConditionETLReadS3"
-  description = "Allow Lambda to read objects from condition-data-${var.stage}"
-  policy      = jsonencode({
-    Version = "2012-10-17"
-    Statement = [
-      {
-        Effect = "Allow"
-        Action = ["s3:GetObject"]
-        Resource = "arn:aws:s3:::condition-data-${var.stage}/*"
-      }
-    ]
-  })
-}
-
-output "condition_etl_s3_read_arn" {
-  value = aws_iam_policy.condition_etl_s3_read.arn
+output "postcode_splitter_s3_read_arn" {
+  value = module.postcode_splitter_s3_read.policy_arn
 }
--- a/pytest.ini
+++ b/pytest.ini
@ -1,4 +1,4 @@
 [pytest]
 pythonpath = .
 addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
-testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests
+testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests
--- a/sfr/principal_pitch/2_export_data.py
+++ b/sfr/principal_pitch/2_export_data.py
@ -13,7 +13,7 @@ from sqlalchemy.orm import sessionmaker
 from backend.app.db.connection import db_engine, db_read_session
 from backend.app.db.models.recommendations import (
    Recommendation,
-    Plan,
+    PlanModel,
    PlanRecommendations,
    RecommendationMaterials,
 )
@ -36,6 +36,8 @@ scenario_names = {
    1059: "EPC C - 10k budget",
 }

+project_name = "manchester"
+

 def get_data(portfolio_id, scenario_ids):
    session = sessionmaker(bind=db_engine)()
@ -73,12 +75,12 @@ def get_data(portfolio_id, scenario_ids):
    # --------------------
    latest_plans_subq = (
        session.query(
-            Plan.scenario_id,
-            Plan.property_id,
-            func.max(Plan.created_at).label("latest_created_at"),
+            PlanModel.scenario_id,
+            PlanModel.property_id,
+            func.max(PlanModel.created_at).label("latest_created_at"),
        )
-        .filter(Plan.scenario_id.in_(scenario_ids))
-        .group_by(Plan.scenario_id, Plan.property_id)
+        .filter(PlanModel.scenario_id.in_(scenario_ids))
+        .group_by(PlanModel.scenario_id, PlanModel.property_id)
        .subquery()
    )

@ -87,12 +89,12 @@ def get_data(portfolio_id, scenario_ids):
    # ).all()

    plans_query = (
-        session.query(Plan)
+        session.query(PlanModel)
        .join(
            latest_plans_subq,
-            (Plan.scenario_id == latest_plans_subq.c.scenario_id)
-            & (Plan.property_id == latest_plans_subq.c.property_id)
-            & (Plan.created_at == latest_plans_subq.c.latest_created_at),
+            (PlanModel.scenario_id == latest_plans_subq.c.scenario_id)
+            & (PlanModel.property_id == latest_plans_subq.c.property_id)
+            & (PlanModel.created_at == latest_plans_subq.c.latest_created_at),
        )
        .all()
    )
@ -108,7 +110,7 @@ def get_data(portfolio_id, scenario_ids):
    # )

    plans_data = [
-        {col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
+        {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
        for plan in plans_query
    ]

@ -118,12 +120,14 @@ def get_data(portfolio_id, scenario_ids):
    # Recommendations (NO materials yet)
    # --------------------
    recommendations_query = (
-        session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id)
+        session.query(
+            Recommendation, PlanModel.scenario_id, PlanRecommendations.plan_id
+        )
        .join(
            PlanRecommendations,
            Recommendation.id == PlanRecommendations.recommendation_id,
        )
-        .join(Plan, Plan.id == PlanRecommendations.plan_id)
+        .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
        .filter(
            PlanRecommendations.plan_id.in_(plan_ids),
            Recommendation.default.is_(True),
@ -230,7 +234,7 @@ for scenario_id in SCENARIOS:
    # Get recs for this scenario
    recommended_measures_df = recommendations_df[
        recommendations_df["scenario_id"] == scenario_id
-        ][["property_id", "measure_type", "estimated_cost", "default"]]
+    ][["property_id", "measure_type", "estimated_cost", "default"]]
    recommended_measures_df = recommended_measures_df[
        recommended_measures_df["default"]
    ]
@ -238,7 +242,7 @@ for scenario_id in SCENARIOS:

    post_install_sap = recommendations_df[
        recommendations_df["scenario_id"] == scenario_id
-        ][["property_id", "default", "sap_points"]]
+    ][["property_id", "default", "sap_points"]]
    post_install_sap = post_install_sap[post_install_sap["default"]]
    # Sum up the sap points by property id
    post_install_sap = (
@ -284,6 +288,8 @@ for scenario_id in SCENARIOS:
                "current_sap_points",
                "total_floor_area",
                "number_of_rooms",
+                "lodgement_date",
+                "is_expired",
                "id",
            ]
        ]
@ -301,7 +307,58 @@ for scenario_id in SCENARIOS:
    )
    df["uprn"] = df["uprn"].astype(str)

+    relevant_plans = plans_df[plans_df["scenario_id"] == scenario_id]
+    df2 = df.merge(
+        relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]],
+        how="left",
+        on="property_id",
+        suffixes=("", "_plan"),
+    )
+    print(df2["predicted_post_works_epc"].value_counts())
+    print(df2["post_epc_rating"].value_counts())
+
+    z = df2[
+        (df2["predicted_post_works_epc"] != "D")
+        & (df2["post_epc_rating"].astype(str) == "Epc.D")
+    ]
+
+    df2["predicted_post_works_epc"].value_counts()
+    df2["post_epc_rating"].astype(str).value_counts()
+
+    df2[df2["total_retrofit_cost"] > 0].shape
+
+    getting_works = df[df["total_retrofit_cost"] > 0]
+    getting_works["predicted_post_works_epc"].value_counts()
+
+    32565 / getting_works.shape[0]
+
+    df[df["predicted_post_works_sap"] == ""]
+
+    # Expected columns list
+    expected_columns = [
+        "suspended_floor_insulation",
+        "solid_floor_insulation",
+        "external_wall_insulation",
+        "internal_wall_insulation",
+        "cavity_wall_insulation",
+        "loft_insulation",
+        "flat_roof_insulation",
+        "room_roof_insulation",
+        "secondary_glazing",
+        "double_glazing",
+        "solar_pv",
+        "high_heat_retention_storage_heaters",
+        "air_source_heat_pump",
+        "boiler_upgrade",
+        "roomstat_programmer_trvs",
+        "time_temperature_zone_control",
+    ]
+    # Add missing columns with default values
+    for col in expected_columns:
+        if col not in df.columns:
+            df[col] = ""
+
    # Create excel to store to
-    filename = f"{scenario_names[scenario_id]} - 20250113 final.xlsx"
+    filename = f"{scenario_names[scenario_id]} - {project_name}.xlsx"
    with pd.ExcelWriter(filename) as writer:
        df.to_excel(writer, sheet_name="properties", index=False)
--- a/utils/logger.py
+++ b/utils/logger.py
@ -1,7 +1,13 @@
 import logging
+from os import PathLike
+from typing import Optional, Union


-def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
+def setup_logger(
+    log_file: Optional[Union[str, PathLike[str]]] = None,
+    level: int = logging.INFO,
+    overwrite_handler: bool = False,
+) -> logging.Logger:
    # Create a logger and set the logging level
    logger = logging.getLogger()
    logger.setLevel(level)
--- a/utils/s3.py
+++ b/utils/s3.py
@ -3,12 +3,62 @@ import boto3
 import csv
 import pandas as pd
 from io import BytesIO, StringIO
+from urllib.parse import unquote
 from utils.logger import setup_logger
 from botocore.exceptions import NoCredentialsError, PartialCredentialsError

 logger = setup_logger()


+def parse_s3_uri(s3_uri: str) -> tuple[str, str]:
+    """
+    Parse S3 URI to extract bucket and key.
+
+    Supports two formats:
+    1. S3 URI format: s3://bucket/key
+    2. AWS console URL format with query parameters
+    """
+    logger.info("Parsing S3 URI")
+
+    try:
+        # Check if it's an S3 URI format
+        if s3_uri.startswith("s3://"):
+            parts = s3_uri[5:].split("/", 1)
+            if len(parts) < 2:
+                raise ValueError("S3 URI must include both bucket and key")
+            bucket = parts[0]
+            key = parts[1]
+            logger.info(f"Extracted bucket: {bucket}, key: {key}")
+            return bucket, key
+
+        # Otherwise, treat as AWS console URL
+        logger.info("Parsing as AWS console URL")
+
+        # Split base URL and query string
+        if "?" not in s3_uri:
+            raise ValueError("No query string found")
+
+        base, query = s3_uri.split("?", 1)
+
+        # Extract bucket from base URL
+        if "/s3/object/" not in base:
+            raise ValueError("No '/s3/object/' found in URL path")
+
+        path_parts = base.split("/s3/object/")
+        bucket = path_parts[1]
+        logger.info(f"Extracted bucket: {bucket}")
+
+        # Extract prefix from query parameters
+        params = dict(item.split("=") for item in query.split("&") if "=" in item)
+        key = unquote(params.get("prefix", ""))
+        logger.info(f"Extracted key: {key}")
+
+        return bucket, key
+    except Exception as e:
+        logger.error(f"Error parsing S3 URI: {type(e).__name__}: {e}")
+        raise ValueError(f"Could not parse S3 URI") from e
+
+
 def read_from_s3(bucket_name, s3_file_name):
    """
    Read an object from s3. Decoding of the data is left for outside of this function
@ -17,11 +67,11 @@ def read_from_s3(bucket_name, s3_file_name):
    :param s3_file_name: The file name to use for the saved data in S3
    """
    # Initialize a session using Amazon S3
-    s3 = boto3.resource('s3')
+    s3 = boto3.resource("s3")

    # Get the MessagePack data from S3
    obj = s3.Object(bucket_name, s3_file_name)
-    data = obj.get()['Body'].read()
+    data = obj.get()["Body"].read()

    return data

@ -36,7 +86,7 @@ def save_data_to_s3(data, bucket_name, s3_file_name):
    """
    # Ensure you have AWS credentials set up - either via environment variables, AWS CLI, or IAM roles
    try:
-        s3 = boto3.client('s3')
+        s3 = boto3.client("s3")
    except NoCredentialsError:
        print("Credentials not available.")
        return
@ -46,12 +96,12 @@ def save_data_to_s3(data, bucket_name, s3_file_name):

    try:
        s3.put_object(Bucket=bucket_name, Key=s3_file_name, Body=data)
-        print(f'Successfully uploaded data to {bucket_name}/{s3_file_name}')
+        print(f"Successfully uploaded data to {bucket_name}/{s3_file_name}")
    except Exception as e:
-        print(f'Failed to upload data to {bucket_name}/{s3_file_name}: {str(e)}')
+        print(f"Failed to upload data to {bucket_name}/{s3_file_name}: {str(e)}")


-def read_io_from_s3(bucket_name, file_key):
+def read_io_from_s3(bucket_name: str, file_key: str) -> BytesIO:
    """
    Read a file from S3 into a BytesIO object. This can be used by other methods to parse the response

@ -61,13 +111,13 @@ def read_io_from_s3(bucket_name, file_key):
    :param file_key: The file name of the shapefile in S3
    :return: Io file to be parsed by another method
    """
-    client = boto3.client('s3')
+    client = boto3.client("s3")

    # Get the Parquet file from S3
    response = client.get_object(Bucket=bucket_name, Key=file_key)

    # Read the file into an io object
-    buffer = BytesIO(response['Body'].read())
+    buffer = BytesIO(response["Body"].read())

    return buffer

@ -86,7 +136,7 @@ def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
    df.to_parquet(parquet_buffer)

    # Create the boto3 client
-    client = boto3.client('s3')
+    client = boto3.client("s3")

    # Upload the Parquet file to S3
    client.put_object(Bucket=bucket_name, Key=file_key, Body=parquet_buffer.getvalue())
@ -102,15 +152,14 @@ def read_dataframe_from_s3_parquet(bucket_name, file_key):
    """

    if bucket_name is None:
-        raise ValueError("Bucket name is None when trying to read dataframe from parquet")
+        raise ValueError(
+            "Bucket name is None when trying to read dataframe from parquet"
+        )

    if not file_key.endswith(".parquet"):
        raise ValueError("This file doesn't look like a parquet file")

-    parquet_buffer = read_io_from_s3(
-        bucket_name=bucket_name,
-        file_key=file_key
-    )
+    parquet_buffer = read_io_from_s3(bucket_name=bucket_name, file_key=file_key)

    df = pd.read_parquet(parquet_buffer)

@ -130,7 +179,7 @@ def save_csv_to_s3(dataframe, bucket_name, file_name):
        bool: True if the file was successfully saved, False otherwise.
    """
    # Initialize S3 client
-    s3 = boto3.client('s3')
+    s3 = boto3.client("s3")

    # Create an in-memory text stream
    csv_buffer = StringIO()
@ -159,7 +208,7 @@ def save_pickle_to_s3(data, bucket_name, s3_file_name):
    try:
        serialized_data = pickle.dumps(data)
    except Exception as e:
-        print(f'Failed to serialize data: {str(e)}')
+        print(f"Failed to serialize data: {str(e)}")
        return

    # Use save_data_to_s3 function to upload the serialized data to S3
@ -175,9 +224,9 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
    :return: The data read from the pickle file
    """
    try:
-        s3 = boto3.client('s3')
+        s3 = boto3.client("s3")
        s3_response = s3.get_object(Bucket=bucket_name, Key=s3_file_name)
-        serialized_data = s3_response['Body'].read()
+        serialized_data = s3_response["Body"].read()
    except NoCredentialsError:
        logger.errpr("Credentials not available.")
        return None
@ -185,20 +234,24 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
        logger.errpr("Incomplete credentials provided.")
        return None
    except Exception as e:
-        logger.error(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
+        logger.error(
+            f"Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}"
+        )
        return None

    # Deserialize data from pickle format
    try:
        data = pickle.loads(serialized_data)
    except Exception as e:
-        logger.error(f'Failed to deserialize data: {str(e)}')
+        logger.error(f"Failed to deserialize data: {str(e)}")
        return None

    return data


-def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True, sheet_name=None):
+def read_excel_from_s3(
+    bucket_name, file_key, header_row, drop_all_na=True, sheet_name=None
+):
    """
    Read an Excel file from an S3 bucket and return it as a pandas DataFrame.

@ -222,7 +275,7 @@ def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True, shee

    # Drop columns where all values are NaN
    if drop_all_na:
-        df.dropna(axis=1, how='all', inplace=True)
+        df.dropna(axis=1, how="all", inplace=True)

    # Reset index if the first column is just an index or entirely NaN
    df.reset_index(drop=True, inplace=True)
@ -254,7 +307,7 @@ def save_excel_to_s3(df, bucket_name, file_key):

    # Initialize a session using boto3
    session = boto3.session.Session()
-    s3 = session.resource('s3')
+    s3 = session.resource("s3")

    # Upload the Excel file from the buffer to S3
    bucket = s3.Bucket(bucket_name)
@ -264,17 +317,19 @@ def save_excel_to_s3(df, bucket_name, file_key):


 def read_csv_from_s3(bucket_name, filepath):
-    logger.info(f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'")
-    s3 = boto3.client('s3')
+    logger.info(
+        f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'"
+    )
+    s3 = boto3.client("s3")

    # Get the object from s3
    s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)

    # Read the CSV body from the s3 object
-    body = s3_object['Body'].read()
+    body = s3_object["Body"].read()

    # Use StringIO to create a file-like object from the string
-    csv_data = StringIO(body.decode('utf-8'))
+    csv_data = StringIO(body.decode("utf-8"))

    # Use csv library to read it into a list of dictionaries
    reader = csv.DictReader(csv_data)
@ -292,14 +347,16 @@ def list_files_in_s3_folder(bucket_name, folder_name):
    :return: A list of file keys in the specified S3 folder.
    """
    try:
-        s3 = boto3.client('s3')
+        s3 = boto3.client("s3")
        response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)

-        if 'Contents' not in response:
-            logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.")
+        if "Contents" not in response:
+            logger.info(
+                f"No files found in folder {folder_name} in bucket {bucket_name}."
+            )
            return []

-        file_keys = [content['Key'] for content in response['Contents']]
+        file_keys = [content["Key"] for content in response["Contents"]]
        return file_keys

    except NoCredentialsError:
@ -309,7 +366,9 @@ def list_files_in_s3_folder(bucket_name, folder_name):
        logger.error("Incomplete credentials provided.")
        return []
    except Exception as e:
-        logger.error(f'Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}')
+        logger.error(
+            f"Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}"
+        )
        return []


@ -335,22 +394,30 @@ def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name):
    """

    # For this function, folder_name should end with a forward slash
-    if not folder_name.endswith('/'):
-        folder_name += '/'
+    if not folder_name.endswith("/"):
+        folder_name += "/"

    try:
-        s3 = boto3.client('s3')
-        response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name, Delimiter='/')
+        s3 = boto3.client("s3")
+        response = s3.list_objects_v2(
+            Bucket=bucket_name, Prefix=folder_name, Delimiter="/"
+        )

        items = []

        # Add files to the list
-        if 'Contents' in response:
-            items.extend([content['Key'] for content in response['Contents'] if content['Key'] != folder_name])
+        if "Contents" in response:
+            items.extend(
+                [
+                    content["Key"]
+                    for content in response["Contents"]
+                    if content["Key"] != folder_name
+                ]
+            )

        # Add immediate subfolders to the list
-        if 'CommonPrefixes' in response:
-            items.extend([prefix['Prefix'] for prefix in response['CommonPrefixes']])
+        if "CommonPrefixes" in response:
+            items.extend([prefix["Prefix"] for prefix in response["CommonPrefixes"]])

        return items

@ -361,7 +428,9 @@ def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name):
        logger.error("Incomplete credentials provided.")
        return []
    except Exception as e:
-        logger.error(f'Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}')
+        logger.error(
+            f"Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}"
+        )
        return []


@ -374,15 +443,21 @@ def list_xmls_in_s3_folder(bucket_name, folder_name):
    :return: A list of XML file keys in the specified S3 folder.
    """
    try:
-        s3 = boto3.client('s3')
+        s3 = boto3.client("s3")
        response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)

-        if 'Contents' not in response:
-            logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.")
+        if "Contents" not in response:
+            logger.info(
+                f"No files found in folder {folder_name} in bucket {bucket_name}."
+            )
            return []

        # Filter XML files
-        xml_files = [content['Key'] for content in response['Contents'] if content['Key'].endswith('.xml')]
+        xml_files = [
+            content["Key"]
+            for content in response["Contents"]
+            if content["Key"].endswith(".xml")
+        ]
        return xml_files

    except NoCredentialsError:
@ -392,5 +467,7 @@ def list_xmls_in_s3_folder(bucket_name, folder_name):
        logger.error("Incomplete credentials provided.")
        return []
    except Exception as e:
-        logger.error(f'Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}')
+        logger.error(
+            f"Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}"
+        )
        return []