From aaa1bdc077d3131cf09a24b371b097bba7e2733b Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Mon, 2 Mar 2026 15:15:39 +0000
Subject: [PATCH 01/21] added utils to allow easier subtask management

---
 .devcontainer/asset_list/Dockerfile           |  7 +-
 .devcontainer/asset_list/devcontainer.json    |  7 +-
 .devcontainer/asset_list/docker-compose.yml   |  6 +-
 .vscode/settings.json                         |  8 +-
 asset_list/app.py                             | 22 +++--
 backend/app/utils.py                          |  4 +-
 backend/ordanceSurvey/handler/Dockerfile      |  0
 .../ordanceSurvey/handler/requirements.txt    |  0
 backend/ordanceSurvey/main.py                 | 37 ++++++++
 backend/utils/subtasks.py                     | 95 +++++++++++++++++++
 utils/s3.py                                   |  3 +-
 11 files changed, 169 insertions(+), 20 deletions(-)
 create mode 100644 backend/ordanceSurvey/handler/Dockerfile
 create mode 100644 backend/ordanceSurvey/handler/requirements.txt
 create mode 100644 backend/ordanceSurvey/main.py
 create mode 100644 backend/utils/subtasks.py
diff --git a/.devcontainer/asset_list/Dockerfile b/.devcontainer/asset_list/Dockerfile
index 72a5de53..be869637 100644
--- a/.devcontainer/asset_list/Dockerfile
+++ b/.devcontainer/asset_list/Dockerfile
@@ -21,7 +21,7 @@ RUN git clone --depth 1 https://github.com/openvenues/libpostal /tmp/libpostal \
  && rm -rf /tmp/libpostal
 
 # 3) Create the user and grant sudo privileges
-RUN useradd -m -s /usr/bin/bash ${USER} \
+RUN useradd -m -s /bin/bash ${USER} \
  && echo "${USER} ALL=(ALL) NOPASSWD: ALL" >/etc/sudoers.d/${USER} \
  && chmod 0440 /etc/sudoers.d/${USER}
 
@@ -32,6 +32,11 @@ ADD asset_list/requirements.txt requirements1.txt
 RUN cat requirements1.txt requirements2.txt >> requirements.txt
 
 RUN pip install -r requirements.txt
+
+# Install code server
+RUN curl -fsSL https://code-server.dev/install.sh | sh
+
+
 # 5) Workdir
 WORKDIR /workspaces/model
 
diff --git a/.devcontainer/asset_list/devcontainer.json b/.devcontainer/asset_list/devcontainer.json
index 945dcd88..83e5a276 100644
--- a/.devcontainer/asset_list/devcontainer.json
+++ b/.devcontainer/asset_list/devcontainer.json
@@ -2,13 +2,14 @@
   "name": "SAL ENV",
   "dockerComposeFile": "docker-compose.yml",
   "service": "model-sal",
-  "remoteUser": "vscode",
+  // "remoteUser": "vscode",
   "workspaceFolder": "/workspaces/model",
-  "postStartCommand": "bash .devcontainer/post-install.sh",
+  "postStartCommand": "bash .devcontainer/asset_list/post-install.sh",
   "mounts": [
     // Optional, just makes getting from Downloads (local env) easier
-    "source=${localEnv:HOME},target=/workspaces/home,type=bind"
+    "source=${localEnv:HOME},target=/home/vscode,type=bind"
   ],
+  "forwardPorts": [8081],
   "customizations": {
     "vscode": {
       "extensions": [
diff --git a/.devcontainer/asset_list/docker-compose.yml b/.devcontainer/asset_list/docker-compose.yml
index 06e4124d..0568393b 100644
--- a/.devcontainer/asset_list/docker-compose.yml
+++ b/.devcontainer/asset_list/docker-compose.yml
@@ -2,15 +2,17 @@ version: '3.8'
 
 services:
   model-sal:
-    user: "${UID}:${GID}"
     build:
       context: ../..
       dockerfile: .devcontainer/asset_list/Dockerfile
-    command: sleep infinity
+    command: code-server --bind-addr 0.0.0.0:8080
+    user: vscode
     volumes:
       - ../../:/workspaces/model
     networks:
       - model-net
+    ports:
+      - "8081:8080"
 
 networks:
   model-net:
diff --git a/.vscode/settings.json b/.vscode/settings.json
index b294c736..56299a40 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -16,7 +16,13 @@
     "python.languageServer": "Pylance",
     "python.analysis.typeCheckingMode": "strict",
     "python.analysis.autoSearchPaths": true,
-    "python.analysis.extraPaths": ["./src"]
+    "python.analysis.extraPaths": ["./src"],
+
+    "vim.useCtrlKeys": true,
+    "vim.handleKeys": {
+        "<C-c>": false,
+        "<C-v>": false
+    }
 
     // Hot reload setting that needs to be in user settings
     // "jupyter.runStartupCommands": [
diff --git a/asset_list/app.py b/asset_list/app.py
index 3e492118..02f930b4 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -77,21 +77,21 @@ def app():
     data_folder = "/workspaces/model/asset_list"
     data_filename = "assests.xlsx"
     sheet_name = "Sheet1"
-    postcode_column = "Postcode"
-    address1_column = "Address"
+    postcode_column = "POSTCODE"
+    address1_column = "ADDRESS"
     address1_method = "house_number_extraction"
     fulladdress_column = None
-    address_cols_to_concat = ["Address"]
+    address_cols_to_concat = ["ADDRESS"]
     missing_postcodes_method = None
     landlord_year_built = None
-    landlord_os_uprn = "UPRN"
-    landlord_property_type = "Archetype"
-    landlord_built_form = "Bedroom Count"
-    landlord_wall_construction = "Wall Insulation Type"
-    landlord_roof_construction = "Roof Type"
-    landlord_heating_system = "Boiler Type"
+    landlord_os_uprn = None
+    landlord_property_type = None
+    landlord_built_form = None
+    landlord_wall_construction = None
+    landlord_roof_construction = None
+    landlord_heating_system = None
     landlord_existing_pv = None
-    landlord_property_id = "Tab"
+    landlord_property_id = "UPRN"
     landlord_sap = None
     outcomes_filename = None
     outcomes_sheetname = None
@@ -488,3 +488,5 @@ def app():
                 asset_list.duplicated_addresses.to_excel(
                     writer, sheet_name="Duplicate Properties", index=False
                 )
+
+
diff --git a/backend/app/utils.py b/backend/app/utils.py
index b3843206..c1ad54f6 100644
--- a/backend/app/utils.py
+++ b/backend/app/utils.py
@@ -43,7 +43,7 @@ def generate_api_key():
     # Define the characters that will be used to generate the api key
     characters = string.ascii_letters + string.digits
     # Generate a 40 character long api key
-    api_key = ''.join(secrets.choice(characters) for _ in range(40))
+    api_key = "".join(secrets.choice(characters) for _ in range(40))
     return api_key
 
 
@@ -113,7 +113,7 @@ def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
     df.to_parquet(parquet_buffer)
 
     # Create the boto3 client
-    s3 = boto3.resource('s3')
+    s3 = boto3.resource("s3")
 
     # Upload the Parquet file to S3
     s3.Object(bucket_name, file_key).put(Body=parquet_buffer.getvalue())
diff --git a/backend/ordanceSurvey/handler/Dockerfile b/backend/ordanceSurvey/handler/Dockerfile
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/ordanceSurvey/handler/requirements.txt b/backend/ordanceSurvey/handler/requirements.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/backend/ordanceSurvey/main.py b/backend/ordanceSurvey/main.py
new file mode 100644
index 00000000..bc482d56
--- /dev/null
+++ b/backend/ordanceSurvey/main.py
@@ -0,0 +1,37 @@
+from typing import Any
+import json
+from utils.logger import setup_logger
+import logging
+
+from backend.utils.subtasks import subtask_handler
+
+logger: logging.Logger = setup_logger()
+import time
+
+
+@subtask_handler()
+def handler(event: dict[str, Any], context: Any, local: bool = False) -> None:
+
+    local = True
+    # Example SQS message for testing (copy and paste into SQS):
+    if local is True:
+        event = {
+            "Records": [
+                {
+                    "body": json.dumps(
+                        {
+                            "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
+                            "sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
+                        }
+                    )
+                }
+            ]
+        }
+
+    print("sleeping for 30 seconds, subtask should be in progress")
+    raise RuntimeError("test")
+    time.sleep(30)
+    print("subtask should be marked as done")
+    # ------------------------------
+    # YOUR BUSINESS LOGIC HERE
+    # ------------------------------
diff --git a/backend/utils/subtasks.py b/backend/utils/subtasks.py
new file mode 100644
index 00000000..041494e9
--- /dev/null
+++ b/backend/utils/subtasks.py
@@ -0,0 +1,95 @@
+# decorators/subtask_handler.py
+
+from functools import wraps
+from typing import Callable, Any
+from uuid import UUID
+import json
+
+from backend.app.db.functions.tasks.Tasks import SubTaskInterface
+
+
+def subtask_handler():
+    """
+    Decorator that wraps your existing handler and automatically:
+
+    - Extracts task_id + sub_task_id from event
+    - Marks subtask as in progress
+    - Executes handler logic
+    - Marks subtask complete on success
+    - Marks failed on exception
+    """
+
+    def decorator(func: Callable[..., Any]):
+
+        @wraps(func)
+        def wrapper(event: dict[str, Any], context: Any, *args, **kwargs):
+
+            records = event.get("Records", [event])
+
+            interface = SubTaskInterface()
+
+            for record in records:
+
+                # -------------------------------
+                # Parse body safely
+                # -------------------------------
+                body = {}
+
+                if isinstance(record.get("body"), str):
+                    try:
+                        body = json.loads(record["body"])
+                    except Exception:
+                        body = {}
+                else:
+                    body = record.get("body", {}) or {}
+
+                task_id_raw = body.get("task_id")
+                subtask_id_raw = body.get("sub_task_id")
+
+                task_id = UUID(task_id_raw) if isinstance(task_id_raw, str) else None
+                subtask_id = (
+                    UUID(subtask_id_raw) if isinstance(subtask_id_raw, str) else None
+                )
+
+                if not task_id or not subtask_id:
+                    raise RuntimeError("task_id or sub_task_id missing")
+
+                # -------------------------------
+                # Mark in progress
+                # -------------------------------
+                interface.update_subtask_status(
+                    subtask_id=subtask_id,
+                    status="in progress",
+                )
+
+                try:
+                    # Pass the parsed body into your function
+                    result = func(body, context, *args, **kwargs)
+
+                    # -------------------------------
+                    # Success → mark complete
+                    # -------------------------------
+                    interface.update_subtask_status(
+                        subtask_id=subtask_id,
+                        status="complete",
+                        outputs={"result": result} if result else None,
+                    )
+
+                except Exception as e:
+
+                    # -------------------------------
+                    # Failure → mark failed
+                    # -------------------------------
+                    interface.update_subtask_status(
+                        subtask_id=subtask_id,
+                        status="failed",
+                        outputs={"error": str(e)},
+                    )
+
+                    raise
+
+            return None
+
+        return wrapper
+
+    return decorator
diff --git a/utils/s3.py b/utils/s3.py
index b3a96dba..6aa3f44e 100644
--- a/utils/s3.py
+++ b/utils/s3.py
@@ -6,6 +6,7 @@ from io import BytesIO, StringIO
 from urllib.parse import unquote
 from utils.logger import setup_logger
 from botocore.exceptions import NoCredentialsError, PartialCredentialsError
+from typing import Any
 
 logger = setup_logger()
 
@@ -316,7 +317,7 @@ def save_excel_to_s3(df, bucket_name, file_key):
     logger.info(f"Excel file saved to S3 bucket '{bucket_name}' with key '{file_key}'")
 
 
-def read_csv_from_s3(bucket_name, filepath):
+def read_csv_from_s3(bucket_name: str, filepath: str) -> list[dict[str, str]]:
     logger.info(
         f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'"
     )

From 2cd24ae3d012226b9ebb15986a012571d9c2a28e Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Mon, 2 Mar 2026 17:23:57 +0000
Subject: [PATCH 02/21] todo list added

---
 backend/ordanceSurvey/main.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/backend/ordanceSurvey/main.py b/backend/ordanceSurvey/main.py
index bc482d56..21090f7b 100644
--- a/backend/ordanceSurvey/main.py
+++ b/backend/ordanceSurvey/main.py
@@ -2,11 +2,9 @@ from typing import Any
 import json
 from utils.logger import setup_logger
 import logging
-
 from backend.utils.subtasks import subtask_handler
 
 logger: logging.Logger = setup_logger()
-import time
 
 
 @subtask_handler()
@@ -22,16 +20,15 @@ def handler(event: dict[str, Any], context: Any, local: bool = False) -> None:
                         {
                             "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
                             "sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
+                            "s3_uri"
                         }
                     )
                 }
             ]
         }
 
-    print("sleeping for 30 seconds, subtask should be in progress")
-    raise RuntimeError("test")
-    time.sleep(30)
-    print("subtask should be marked as done")
-    # ------------------------------
-    # YOUR BUSINESS LOGIC HERE
-    # ------------------------------
+    # Add business logic to do handling
+    # TODO: Copy s3_uri importing from address2uprn
+    # TODO: Copy s3_uri logic to read csv from address2uprn and search for ones without UPRN/score is low
+    # TODO: Copy and do ordant survey logic
+    # TODO: Save new results to s3 ( ask Khalim if we want to save to db)

From db251c185728d4f3f76c008593602368db96572d Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Wed, 4 Mar 2026 16:14:27 +0000
Subject: [PATCH 03/21] removed duplicate code

---
 backend/address2UPRN/main.py         | 199 +-------------------------
 backend/app/config.py                |   2 +
 backend/ordanceSurvey/main.py        | 113 ++++++++++++---
 backend/utils/addressMatch.py        | 201 +++++++++++++++++++++++++++
 backend/utils/ordnance_survey.py     |  44 ++++++
 sfr/principal_pitch/2_export_data.py |  14 +-
 6 files changed, 356 insertions(+), 217 deletions(-)
 create mode 100644 backend/utils/addressMatch.py
 create mode 100644 backend/utils/ordnance_survey.py

diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py
index af29a095..7d52c562 100644
--- a/backend/address2UPRN/main.py
+++ b/backend/address2UPRN/main.py
@@ -2,12 +2,8 @@ from epc_api.client import EpcClient
 import os
 from urllib.parse import urlencode
 import pandas as pd
-from difflib import SequenceMatcher
 from utils.logger import setup_logger
-import re
-from typing import Set
 import json
-import requests
 from uuid import UUID
 import uuid
 from backend.app.db.functions.tasks.Tasks import SubTaskInterface
@@ -18,6 +14,8 @@ from utils.s3 import (
 )
 from datetime import datetime
 
+from backend.utils.addressMatch import addressMatch
+
 logger = setup_logger()
 
 
@@ -29,191 +27,6 @@ if EPC_AUTH_TOKEN is None:
     raise RuntimeError("EPC_AUTH_TOKEN not defined in env")
 
 
-def is_valid_postcode(postcode_clean: str) -> bool:
-    """
-    Validate postcode using postcodes.io.
-
-    Expects a sanitised postcode (e.g. E84SQ).
-    Returns True if valid, False otherwise.
-    """
-    POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
-    if not postcode_clean:
-        return False
-
-    try:
-        resp = requests.get(
-            POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
-            timeout=5,
-        )
-        resp.raise_for_status()
-        return resp.json().get("result", False)
-    except requests.RequestException:
-        # Network issues, rate limits, etc.
-        return False
-
-
-def levenshtein(a: str, b: str) -> float:
-    """
-    Address similarity score in [0, 1].
-
-    Strategy:
-    - Normalise
-    - Strongly penalise mismatched house/flat numbers
-    - Combine token overlap + character similarity
-    """
-
-    def extract_number_sequence(s: str) -> list[str]:
-        return re.findall(r"\d+[a-z]?", s)
-
-    def extract_numbers(s: str) -> Set[str]:
-        return set(extract_number_sequence(s))
-
-    def tokenise(s: str) -> Set[str]:
-        return set(s.split())
-
-    def extract_building_number(s: str) -> str | None:
-        """
-        Extract the main building number (NOT flat/unit).
-        Assumes formats like:
-        - '42 moreton road'
-        - 'flat 3 42 moreton road'
-        """
-        tokens = s.split()
-
-        # remove flat/unit context
-        cleaned = []
-        skip_next = False
-        for t in tokens:
-            if t in ("flat", "apt", "apartment", "unit"):
-                skip_next = True
-                continue
-            if skip_next:
-                skip_next = False
-                continue
-            cleaned.append(t)
-
-        # first remaining number is building number
-        for t in cleaned:
-            if re.fullmatch(r"\d+[a-z]?", t):
-                return t
-
-        return None
-
-    a_norm = normalise_address(a)
-    b_norm = normalise_address(b)
-
-    # --- hard signal: numbers ---
-    nums_a = extract_numbers(a_norm)
-    nums_b = extract_numbers(b_norm)
-
-    if nums_a and not nums_b:
-        return 0.0
-
-    # No shared numbers at all → impossible match
-    if nums_a and nums_b and nums_a.isdisjoint(nums_b):
-        return 0.0
-
-    # 🔒 HARD GUARD: building number must match
-    bld_a = extract_building_number(a_norm)
-    bld_b = extract_building_number(b_norm)
-
-    if bld_a and bld_b and bld_a != bld_b:
-        return 0.0
-
-    # --- order-sensitive flat/building guard ---
-    seq_a = extract_number_sequence(a_norm)
-    seq_b = extract_number_sequence(b_norm)
-
-    has_flat_token_user = any(
-        tok in a_norm for tok in ("flat", "apt", "apartment", "unit")
-    )
-    has_flat_token_epc = "flat" in b_norm
-
-    if (
-        len(seq_a) == 2
-        and len(seq_b) >= 2
-        and has_flat_token_epc
-        and not has_flat_token_user
-        and seq_a != seq_b[:2]
-    ):
-        return 0.0
-
-    # --- token similarity (order-independent) ---
-    toks_a = tokenise(a_norm)
-    toks_b = tokenise(b_norm)
-
-    if not toks_a or not toks_b:
-        token_score = 0.0
-    else:
-        token_score = len(toks_a & toks_b) / len(toks_a | toks_b)
-
-    # --- character similarity (soft signal) ---
-    char_score = SequenceMatcher(None, a_norm, b_norm).ratio()
-
-    # --- weighted blend ---
-    return round(
-        0.65 * token_score + 0.35 * char_score,
-        4,
-    )
-
-
-def normalise_address(s: str) -> str:
-    """
-    Canonical UK-focused address normalisation.
-
-    - Lowercases
-    - Removes punctuation (keeps / for flats)
-    - Normalises whitespace
-    - Applies synonym compression at token level
-    """
-
-    if not s:
-        return ""
-
-    ADDRESS_SYNONYMS = {
-        # street types
-        "rd": "road",
-        "rd.": "road",
-        "st": "street",
-        "st.": "street",
-        "ave": "avenue",
-        "ave.": "avenue",
-        "ln": "lane",
-        "ln.": "lane",
-        "cres": "crescent",
-        "ct": "court",
-        "dr": "drive",
-        # flats / units
-        "apt": "flat",
-        "apartment": "flat",
-        "unit": "flat",
-        "ste": "suite",
-        # numbering noise
-        "no": "",
-        "no.": "",
-    }
-    # 1. lowercase
-    s = s.lower()
-
-    # 1.5 split digit-letter suffixes
-    s = re.sub(r"(\d+)([a-z])\b", r"\1 \2", s)
-
-    # 2. remove punctuation except /
-    s = re.sub(r"[^\w\s/]", " ", s)
-
-    # 3. normalise whitespace
-    s = re.sub(r"\s+", " ", s).strip()
-
-    # 4. tokenise + synonym normalisation
-    tokens = []
-    for tok in s.split():
-        replacement = ADDRESS_SYNONYMS.get(tok, tok)
-        if replacement:
-            tokens.append(replacement)
-
-    return " ".join(tokens)
-
-
 def score_addresses(
     df: pd.DataFrame,
     user_address: str,
@@ -222,7 +35,7 @@ def score_addresses(
     if column not in df.columns:
         raise ValueError(f"Missing column: {column}")
 
-    return df[column].apply(lambda x: levenshtein(user_address, x))
+    return df[column].apply(lambda x: addressMatch.score(user_address, x))
 
 
 def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
@@ -314,9 +127,9 @@ def get_uprn_candidates(
 
     out = df.copy()
 
-    user_norm = normalise_address(user_address)
+    user_norm = addressMatch.normalise_address(user_address)
 
-    out["lexiscore"] = out[address_column].apply(lambda x: levenshtein(user_norm, x))
+    out["lexiscore"] = out[address_column].apply(lambda x: addressMatch.levenshtein(user_norm, x))
 
     # Normalise UPRN to string
     out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
@@ -653,7 +466,7 @@ def handler(event, context, local=False):
                 )
 
                 # Validate postcode before processing
-                if not is_valid_postcode(postcode):
+                if not addressMatch.is_valid_postcode(postcode):
                     logger.warning(f"Postcode {postcode} is invalid, skipping")
                     continue
 
diff --git a/backend/app/config.py b/backend/app/config.py
index 26fb6b8b..b5b29137 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -63,6 +63,8 @@ class Settings(BaseSettings):
     # Other S3 buckts
     ENERGY_ASSESSMENTS_BUCKET: str = "changeme"
 
+    ORDNANCE_SURVEY_API_KEY: str = "changeme"
+
     # Optional AWS creds (only required in local)
     AWS_ACCESS_KEY_ID: Optional[str] = None
     AWS_SECRET_KEY_ID: Optional[str] = None
diff --git a/backend/ordanceSurvey/main.py b/backend/ordanceSurvey/main.py
index 21090f7b..5961aa16 100644
--- a/backend/ordanceSurvey/main.py
+++ b/backend/ordanceSurvey/main.py
@@ -3,32 +3,113 @@ import json
 from utils.logger import setup_logger
 import logging
 from backend.utils.subtasks import subtask_handler
+from utils.s3 import (
+    # save_csv_to_s3,
+    read_csv_from_s3 as read_csv_from_s3_dict,
+    parse_s3_uri,
+)
+from backend.utils.addressMatch import addressMatch
+from backend.app.db.connection import get_db_session
+from backend.app.db.models.postcode_search import PostcodeSearchModel
+from backend.utils.ordnance_survey import (
+    lookup_os_places,
+    os_places_results_to_dataframe,
+)
+from backend.app.config import get_settings
+from sqlalchemy import select
+
+import pandas as pd
 
 logger: logging.Logger = setup_logger()
 
 
-@subtask_handler()
-def handler(event: dict[str, Any], context: Any, local: bool = False) -> None:
+def check_if_post_code_exists_in_db_cache(postcode):
 
+    with get_db_session() as session:
+        result = (
+            session.execute(
+                select(PostcodeSearchModel).where(
+                    PostcodeSearchModel.postcode == postcode
+                )
+            )
+            .scalars()
+            .first()
+        )
+        if result:
+            return os_places_results_to_dataframe(result.result_data)
+
+        # Cache miss — fetch from OS Places API
+        api_key = get_settings().ORDNANCE_SURVEY_API_KEY
+        response = lookup_os_places(postcode, api_key)
+
+        if response.get("status") != 200 or "data" not in response:
+            logger.error(f"OS Places API failed for {postcode}: {response}")
+            raise RuntimeError(
+                "A postcode that doesn't exists in ordant survey and check if its real in postcode validator!!! Postcode: {postcode}"
+            )
+            return None
+
+        # Save to cache
+        new_record = PostcodeSearchModel(
+            postcode=postcode,
+            result_data=response["data"],
+        )
+        session.add(new_record)
+        session.commit()
+
+        return os_places_results_to_dataframe(response["data"])
+
+
+def get_ordance_survey_record(row, cache=None):
+    if cache is None:
+        cache = check_if_post_code_exists_in_db_cache(postcode)
+
+    # process cache with row
+
+
+@subtask_handler()  # This assumes task_id and subtask_id is defined in event.Records.body
+def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
+
+    # delete this line after test
     local = True
     # Example SQS message for testing (copy and paste into SQS):
     if local is True:
-        event = {
-            "Records": [
-                {
-                    "body": json.dumps(
-                        {
-                            "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
-                            "sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
-                            "s3_uri"
-                        }
-                    )
-                }
-            ]
+        body = {
+            "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
+            "sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
+            "s3_uri": "s3://retrofit-data-dev/ara_raw_outputs/e31f2f21-175b-4a91-a3ec-a6baa325e917/09cc7368-0850-4145-8b04-ebd84b3263c4/2026-02-18T14:00:13.228611_d2f675c3.csv",
         }
 
+    s3_uri: str = body.get("s3_uri", "")
+    lexiscore_threshold: float = body.get("lexiscore_threshold", 0.5)
+
+    if s3_uri == "":
+        raise RuntimeError("Missing s3_uri in message body")
+
+    bucket, key = parse_s3_uri(s3_uri)
+
+    # Assumption designing with address2uprn was ran first
+    csv_data = read_csv_from_s3_dict(bucket, key)
+    df = pd.DataFrame(csv_data)
+    df["domna_lexiscore"] = pd.to_numeric(df["domna_lexiscore"], errors="coerce")
+    needs_processing = df[
+        df["domna_lexiscore"].isna() | (df["domna_lexiscore"] < lexiscore_threshold)
+    ]
+
+    grouped = needs_processing.groupby("postcode_clean")
+
+    # Process each postcode group at a time
+    for postcode, group in grouped:
+        print(f"Processing postcode: {postcode} ({len(group)} rows)")
+        valid_group = addressMatch.is_valid_postcode(postcode)
+        if valid_group:
+            postcode_cache = None
+            if postcode_cache is None:
+                postcode_cache = get_ordance_survey_record(postcode)
+                for index, row in group.iterrows():
+                    print("do something")
+            break
+
     # Add business logic to do handling
-    # TODO: Copy s3_uri importing from address2uprn
-    # TODO: Copy s3_uri logic to read csv from address2uprn and search for ones without UPRN/score is low
     # TODO: Copy and do ordant survey logic
     # TODO: Save new results to s3 ( ask Khalim if we want to save to db)
diff --git a/backend/utils/addressMatch.py b/backend/utils/addressMatch.py
new file mode 100644
index 00000000..b09c1672
--- /dev/null
+++ b/backend/utils/addressMatch.py
@@ -0,0 +1,201 @@
+import re
+from typing import Any, Optional
+from difflib import SequenceMatcher
+import requests
+
+
+class addressMatch:
+    def __init__(self):
+        return None
+
+    @staticmethod
+    def score(a: str, b: str) -> float:
+        score: float = addressMatch.levenshtein(a, b)
+
+        return score
+
+    @staticmethod
+    def is_valid_postcode(postcode_clean: str) -> bool:
+        """
+        Validate postcode using postcodes.io.
+
+        Expects a sanitised postcode (e.g. E84SQ).
+        Returns True if valid, False otherwise.
+        """
+        POSTCODES_IO_VALIDATE_URL = (
+            "https://api.postcodes.io/postcodes/{postcode}/validate"
+        )
+        if not postcode_clean:
+            return False
+
+        try:
+            resp = requests.get(
+                POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
+                timeout=5,
+            )
+            resp.raise_for_status()
+            return resp.json().get("result", False)
+        except requests.RequestException:
+            # Network issues, rate limits, etc.
+            return False
+
+    @staticmethod
+    def normalise_address(s: str) -> str:
+        """
+        Canonical UK-focused address normalisation.
+
+        - Lowercases
+        - Removes punctuation (keeps / for flats)
+        - Normalises whitespace
+        - Applies synonym compression at token level
+        """
+
+        if not s:
+            return ""
+
+        ADDRESS_SYNONYMS = {
+            # street types
+            "rd": "road",
+            "rd.": "road",
+            "st": "street",
+            "st.": "street",
+            "ave": "avenue",
+            "ave.": "avenue",
+            "ln": "lane",
+            "ln.": "lane",
+            "cres": "crescent",
+            "ct": "court",
+            "dr": "drive",
+            # flats / units
+            "apt": "flat",
+            "apartment": "flat",
+            "unit": "flat",
+            "ste": "suite",
+            # numbering noise
+            "no": "",
+            "no.": "",
+        }
+        # 1. lowercase
+        s = s.lower()
+
+        # 1.5 split digit-letter suffixes
+        s = re.sub(r"(\d+)([a-z])\b", r"\1 \2", s)
+
+        # 2. remove punctuation except /
+        s = re.sub(r"[^\w\s/]", " ", s)
+
+        # 3. normalise whitespace
+        s = re.sub(r"\s+", " ", s).strip()
+
+        # 4. tokenise + synonym normalisation
+        tokens: list[str] = []
+        for tok in s.split():
+            replacement = ADDRESS_SYNONYMS.get(tok, tok)
+            if replacement:
+                tokens.append(replacement)
+        return " ".join(tokens)
+
+    @staticmethod
+    def levenshtein(a: str, b: str) -> float:
+        """
+        Address similarity score in [0, 1].
+
+        Strategy:
+        - Normalise
+        - Strongly penalise mismatched house/flat numbers
+        - Combine token overlap + character similarity
+        """
+
+        def extract_number_sequence(s: str) -> list[str]:
+            return re.findall(r"\d+[a-z]?", s)
+
+        def extract_numbers(s: str) -> set[str]:
+            return set(extract_number_sequence(s))
+
+        def tokenise(s: str) -> set[str]:
+            return set(s.split())
+
+        def extract_building_number(s: str) -> Optional[str]:
+            """
+            Extract the main building number (NOT flat/unit).
+            Assumes formats like:
+            - '42 moreton road'
+            - 'flat 3 42 moreton road'
+            """
+            tokens = s.split()
+
+            # remove flat/unit context
+            cleaned: list[Any] = []
+            skip_next = False
+            for t in tokens:
+                if t in ("flat", "apt", "apartment", "unit"):
+                    skip_next = True
+                    continue
+                if skip_next:
+                    skip_next = False
+                    continue
+                cleaned.append(t)
+
+            # first remaining number is building number
+            for t in cleaned:
+                if re.fullmatch(r"\d+[a-z]?", t):
+                    return t
+
+            return None
+
+        a_norm = addressMatch.normalise_address(a)
+        b_norm = addressMatch.normalise_address(b)
+
+        # --- hard signal: numbers ---
+        nums_a = extract_numbers(a_norm)
+        nums_b = extract_numbers(b_norm)
+
+        if nums_a and not nums_b:
+            return 0.0
+
+        # No shared numbers at all → impossible match
+        if nums_a and nums_b and nums_a.isdisjoint(nums_b):
+            return 0.0
+
+        # 🔒 HARD GUARD: building number must match
+        bld_a = extract_building_number(a_norm)
+        bld_b = extract_building_number(b_norm)
+
+        if bld_a and bld_b and bld_a != bld_b:
+            return 0.0
+
+        # --- order-sensitive flat/building guard ---
+        seq_a = extract_number_sequence(a_norm)
+        seq_b = extract_number_sequence(b_norm)
+
+        has_flat_token_user = any(
+            tok in a_norm for tok in ("flat", "apt", "apartment", "unit")
+        )
+        has_flat_token_epc = "flat" in b_norm
+
+        if (
+            len(seq_a) == 2
+            and len(seq_b) >= 2
+            and has_flat_token_epc
+            and not has_flat_token_user
+            and seq_a != seq_b[:2]
+        ):
+            return 0.0
+
+        # --- token similarity (order-independent) ---
+        toks_a: set[str] = tokenise(a_norm)
+        toks_b: set[str] = tokenise(b_norm)
+
+        if not toks_a or not toks_b:
+            token_score = 0.0
+        else:
+            token_score = len(toks_a & toks_b) / len(toks_a | toks_b)
+
+        # --- character similarity (soft signal) ---
+        char_score: float = SequenceMatcher(None, a_norm, b_norm).ratio()
+
+        # --- weighted blend ---
+        return round(
+            0.65 * token_score + 0.35 * char_score,
+            4,
+        )
diff --git a/backend/utils/ordnance_survey.py b/backend/utils/ordnance_survey.py
new file mode 100644
index 00000000..03a0e57b
--- /dev/null
+++ b/backend/utils/ordnance_survey.py
@@ -0,0 +1,44 @@
+import urllib.parse
+import requests
+import pandas as pd
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+def os_places_results_to_dataframe(data: dict) -> pd.DataFrame:
+    """
+    Flatten the OS Places API response results into a DataFrame.
+    Each result contains either a DPA or LPI record.
+    """
+    results = data.get("results", [])
+    rows = []
+    for r in results:
+        if "DPA" in r:
+            rows.append(r["DPA"])
+        elif "LPI" in r:
+            rows.append(r["LPI"])
+    return pd.DataFrame(rows)
+
+
+def lookup_os_places(postcode: str, api_key: str) -> dict:
+    """
+    Lookup a postcode using the OS Places API.
+    Returns the full API response data or an error dict.
+    """
+    if not api_key:
+        return {"error": "Ordnance Survey API key not specified", "status": 400}
+
+    encoded_postcode = urllib.parse.quote(postcode)
+    url = (
+        f"https://api.os.uk/search/places/v1/postcode?postcode={encoded_postcode}"
+        f"&dataset=DPA,LPI&key={api_key}"
+    )
+
+    response = requests.get(url)
+    if response.status_code != 200:
+        logger.error(f"OS Places API error for postcode {postcode}: {response.status_code}")
+        return {"error": "Failed to fetch address data", "status": response.status_code}
+
+    data = response.json()
+    return {"data": data, "status": 200}
diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py
index 4f430209..b1c3a88a 100644
--- a/sfr/principal_pitch/2_export_data.py
+++ b/sfr/principal_pitch/2_export_data.py
@@ -28,15 +28,15 @@ from sqlalchemy import func
 
 # PORTFOLIO_ID = 206
 # SCENARIOS = [389]
-PORTFOLIO_ID = 568
-SCENARIOS = [
-    1059,
-]
+PORTFOLIO_ID = 404
+SCENARIOS = [819, 829, 872]
 scenario_names = {
-    1059: "EPC C - 10k budget",
+    819: "EPC C",
+    829: "EPC C - no solid floor",
+    872: "EPC C - no solid floor, refresh",
 }
 
-project_name = "manchester"
+project_name = "lincs_rural"
 
 
 def get_data(portfolio_id, scenario_ids):
@@ -330,8 +330,6 @@ for scenario_id in SCENARIOS:
     getting_works = df[df["total_retrofit_cost"] > 0]
     getting_works["predicted_post_works_epc"].value_counts()
 
-    32565 / getting_works.shape[0]
-
     df[df["predicted_post_works_sap"] == ""]
 
     # Expected columns list

From 1b3a942c308e54a23e5543c739f1acff30da95c1 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Wed, 4 Mar 2026 16:58:23 +0000
Subject: [PATCH 04/21] ordance survey logic basically finsihed

---
 backend/address2UPRN/main.py  | 24 ++++++-----
 backend/ordanceSurvey/main.py | 78 +++++++++++++++++++++++++++--------
 2 files changed, 74 insertions(+), 28 deletions(-)

diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py
index 7d52c562..53e50617 100644
--- a/backend/address2UPRN/main.py
+++ b/backend/address2UPRN/main.py
@@ -129,7 +129,9 @@ def get_uprn_candidates(
 
     user_norm = addressMatch.normalise_address(user_address)
 
-    out["lexiscore"] = out[address_column].apply(lambda x: addressMatch.levenshtein(user_norm, x))
+    out["lexiscore"] = out[address_column].apply(
+        lambda x: addressMatch.levenshtein(user_norm, x)
+    )
 
     # Normalise UPRN to string
     out[uprn_column] = out[uprn_column].astype(str).str.replace(r"\.0$", "", regex=True)
@@ -346,7 +348,7 @@ def handler(event, context, local=False):
                         {
                             "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
                             "sub_task_id": "6a427b6e-1ece-4983-b1e5-9bffccc53d1d",
-                            "s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-16T12:00:20.257856_7b520c0e.csv",
+                            "s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-18T11:47:00.822579_f95467f5.csv",
                         }
                     )
                 }
@@ -507,9 +509,9 @@ def handler(event, context, local=False):
                             results_data.append(
                                 {
                                     **row,  # Include all original data
-                                    "uprn": uprn,
-                                    "domna_found_address": found_address,
-                                    "domna_lexiscore": score,
+                                    "address2uprn_uprn": uprn,
+                                    "address2uprn_address": found_address,
+                                    "address2uprn_lexiscore": score,
                                 }
                             )
                         else:
@@ -519,9 +521,9 @@ def handler(event, context, local=False):
                             results_data.append(
                                 {
                                     **row,  # Include all original data
-                                    "uprn": None,
-                                    "domna_found_address": None,
-                                    "domna_lexiscore": None,
+                                    "address2uprn_uprn": None,
+                                    "address2uprn_address": None,
+                                    "address2uprn_lexiscore": None,
                                 }
                             )
 
@@ -533,9 +535,9 @@ def handler(event, context, local=False):
                         results_data.append(
                             {
                                 **row,
-                                "uprn": None,
-                                "domna_found_address": None,
-                                "domna_lexiscore": None,
+                                "address2uprn_uprn": None,
+                                "address2uprn_address": None,
+                                "address2uprn_lexiscore": None,
                                 "error": str(e),
                             }
                         )
diff --git a/backend/ordanceSurvey/main.py b/backend/ordanceSurvey/main.py
index 5961aa16..4200bd24 100644
--- a/backend/ordanceSurvey/main.py
+++ b/backend/ordanceSurvey/main.py
@@ -44,10 +44,7 @@ def check_if_post_code_exists_in_db_cache(postcode):
 
         if response.get("status") != 200 or "data" not in response:
             logger.error(f"OS Places API failed for {postcode}: {response}")
-            raise RuntimeError(
-                "A postcode that doesn't exists in ordant survey and check if its real in postcode validator!!! Postcode: {postcode}"
-            )
-            return None
+            return pd.DataFrame()
 
         # Save to cache
         new_record = PostcodeSearchModel(
@@ -77,7 +74,7 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
         body = {
             "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
             "sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
-            "s3_uri": "s3://retrofit-data-dev/ara_raw_outputs/e31f2f21-175b-4a91-a3ec-a6baa325e917/09cc7368-0850-4145-8b04-ebd84b3263c4/2026-02-18T14:00:13.228611_d2f675c3.csv",
+            "s3_uri": "s3://retrofit-data-dev/ara_raw_outputs/e31f2f21-175b-4a91-a3ec-a6baa325e917/6a427b6e-1ece-4983-b1e5-9bffccc53d1d/2026-03-04T16:48:22.339995_634c88fc.csv",
         }
 
     s3_uri: str = body.get("s3_uri", "")
@@ -91,25 +88,72 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
     # Assumption designing with address2uprn was ran first
     csv_data = read_csv_from_s3_dict(bucket, key)
     df = pd.DataFrame(csv_data)
-    df["domna_lexiscore"] = pd.to_numeric(df["domna_lexiscore"], errors="coerce")
+    df["address2uprn_lexiscore"] = pd.to_numeric(
+        df["address2uprn_lexiscore"], errors="coerce"
+    )
     needs_processing = df[
-        df["domna_lexiscore"].isna() | (df["domna_lexiscore"] < lexiscore_threshold)
+        df["address2uprn_lexiscore"].isna()
+        | (df["address2uprn_lexiscore"] < lexiscore_threshold)
     ]
 
     grouped = needs_processing.groupby("postcode_clean")
 
+    # Initialise new columns
+    df["ordnance_survey_address"] = None
+    df["ordnance_survey_uprn"] = None
+    df["ordnance_survey_lexiscore"] = None
+
     # Process each postcode group at a time
     for postcode, group in grouped:
         print(f"Processing postcode: {postcode} ({len(group)} rows)")
         valid_group = addressMatch.is_valid_postcode(postcode)
-        if valid_group:
-            postcode_cache = None
-            if postcode_cache is None:
-                postcode_cache = get_ordance_survey_record(postcode)
-                for index, row in group.iterrows():
-                    print("do something")
-            break
+        if not valid_group:
+            logger.warning(f"Postcode {postcode} is invalid, skipping")
+            for idx in group.index:
+                df.at[idx, "ordnance_survey_address"] = (
+                    "postcode not found in ordnance survey"
+                )
+                df.at[idx, "ordnance_survey_uprn"] = (
+                    "postcode not found in ordnance survey"
+                )
+                df.at[idx, "ordnance_survey_lexiscore"] = (
+                    "postcode not found in ordnance survey"
+                )
+            continue
 
-    # Add business logic to do handling
-    # TODO: Copy and do ordant survey logic
-    # TODO: Save new results to s3 ( ask Khalim if we want to save to db)
+        postcode_cache = check_if_post_code_exists_in_db_cache(postcode)
+        if postcode_cache.empty:
+            logger.warning(f"No OS Places data for {postcode}")
+            for idx in group.index:
+                df.at[idx, "ordnance_survey_address"] = (
+                    "postcode not found in ordnance survey"
+                )
+                df.at[idx, "ordnance_survey_uprn"] = (
+                    "postcode not found in ordnance survey"
+                )
+                df.at[idx, "ordnance_survey_lexiscore"] = (
+                    "postcode not found in ordnance survey"
+                )
+            continue
+
+        for idx, row in group.iterrows():
+            user_address = str(row.get("user_input", "")).strip()
+            if not user_address:
+                continue
+
+            # Score against OS Places addresses
+            scores = postcode_cache["ADDRESS"].apply(
+                lambda addr: addressMatch.score(user_address, addr)
+            )
+            best_idx = scores.idxmax()
+            best_score = scores[best_idx]
+
+            df.at[idx, "ordnance_survey_address"] = postcode_cache.at[
+                best_idx, "ADDRESS"
+            ]
+            df.at[idx, "ordnance_survey_uprn"] = postcode_cache.at[best_idx, "UPRN"]
+            df.at[idx, "ordnance_survey_lexiscore"] = best_score
+
+    # TODO: Save new results to s3 (ask Khalim if we want to save to db)
+    df.to_csv("ordnance_survey_results.csv", index=False)
+    print(f"Results saved to ordnance_survey_results.csv ({len(df)} rows)")

From 24b19dbf9a9701a80a83cbfd4a03287bd71e7d84 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Wed, 4 Mar 2026 17:04:14 +0000
Subject: [PATCH 05/21] spelling mistake

---
 backend/{ordanceSurvey => ordnanceSurvey}/handler/Dockerfile      | 0
 .../{ordanceSurvey => ordnanceSurvey}/handler/requirements.txt    | 0
 backend/{ordanceSurvey => ordnanceSurvey}/main.py                 | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename backend/{ordanceSurvey => ordnanceSurvey}/handler/Dockerfile (100%)
 rename backend/{ordanceSurvey => ordnanceSurvey}/handler/requirements.txt (100%)
 rename backend/{ordanceSurvey => ordnanceSurvey}/main.py (100%)

diff --git a/backend/ordanceSurvey/handler/Dockerfile b/backend/ordnanceSurvey/handler/Dockerfile
similarity index 100%
rename from backend/ordanceSurvey/handler/Dockerfile
rename to backend/ordnanceSurvey/handler/Dockerfile
diff --git a/backend/ordanceSurvey/handler/requirements.txt b/backend/ordnanceSurvey/handler/requirements.txt
similarity index 100%
rename from backend/ordanceSurvey/handler/requirements.txt
rename to backend/ordnanceSurvey/handler/requirements.txt
diff --git a/backend/ordanceSurvey/main.py b/backend/ordnanceSurvey/main.py
similarity index 100%
rename from backend/ordanceSurvey/main.py
rename to backend/ordnanceSurvey/main.py

From eda2fb36c66c5b591236b656c6f6a8507d2f0477 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Wed, 4 Mar 2026 17:19:47 +0000
Subject: [PATCH 06/21] get rid of ordancy survey

---
 backend/OrdnanceSurvey.py | 131 --------------------------------------
 1 file changed, 131 deletions(-)
 delete mode 100644 backend/OrdnanceSurvey.py

diff --git a/backend/OrdnanceSurvey.py b/backend/OrdnanceSurvey.py
deleted file mode 100644
index a4d716d0..00000000
--- a/backend/OrdnanceSurvey.py
+++ /dev/null
@@ -1,131 +0,0 @@
-from functools import lru_cache
-import urllib.parse
-import requests
-from utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-class OrdnanceSuveyClient:
-
-    def __init__(self, address, postcode, api_key):
-        """
-        This class is tasked with interaction with the ordnance survey API.
-        :param address: The address for the property to search for
-        :param postcode: The postcode for the property to search for
-        """
-
-        self.address = address
-        self.postcode = postcode
-        self.full_address = ", ".join([self.address, self.postcode])
-        self.api_key = api_key
-
-        self.results = None
-
-        self.most_relevant_result = None
-        self.property_type = None
-        self.built_form = None
-        # This will be postcode and address, as returned by the ordnance survey
-        self.address_os = None
-        self.postcode_os = None
-
-    def set_places_address(self):
-        """
-        Given a response from the places api, this function will set the address and postcode of the property
-        """
-
-        if self.most_relevant_result is None:
-            raise ValueError("No results found - run get_places_api first")
-
-        self.address_os = self.most_relevant_result["ADDRESS"]
-
-        if "POSTCODE" in self.most_relevant_result:
-            self.postcode_os = self.most_relevant_result["POSTCODE"]
-        else:
-            self.postcode_os = self.most_relevant_result["POSTCODE_LOCATOR"]
-        # We strip out the postcode from the address as this is already stored separately
-        self.address_os = self.address_os.replace(self.postcode_os, "").strip()
-        # Remove trailing comma
-        self.address_os = self.address_os.rstrip(",").strip()
-        # Convert to title case
-        self.address_os = self.address_os.title()
-        # Make sure postcode is upper case
-        self.postcode_os = self.postcode_os.upper()
-
-    @lru_cache(maxsize=128)
-    def get_places_api(self, filter_by_postcode=False):
-        """
-        This method is tasked with getting the places api from the Ordnance Survey.
-        """
-
-        if not self.api_key:
-            raise ValueError("Ordnance Survey API key not specified")
-
-        encoded_address_query = urllib.parse.quote(self.full_address)
-
-        url = (
-            f"https://api.os.uk/search/places/v1/find?query={encoded_address_query}&dataset=DPA,LPI&matchprecision=10"
-            f"&key={self.api_key}"
-        )
-
-        response = requests.get(url)
-        if response.status_code == 200:
-            data = response.json()
-            res = data["results"]
-
-            if filter_by_postcode:
-                results = []
-                for r in res:
-                    if "DPA" in r:
-                        if r["DPA"]["POSTCODE"] == self.postcode:
-                            results.append(r)
-                    elif "LPI" in r:
-                        if r["LPI"]["POSTCODE_LOCATOR"] == self.postcode:
-                            results.append(r)
-                    else:
-                        raise ValueError("Could not find postcode in either DPA or LPI")
-            else:
-                results = res
-
-            self.results = results
-
-            # Extract some details about the best match
-            self.most_relevant_result = self.results[0]["DPA"] if "DPA" in self.results[0] else self.results[0]["LPI"]
-
-            self.parse_classification_code(self.most_relevant_result["CLASSIFICATION_CODE"])
-            self.set_places_address()
-
-        else:
-            logger.info("Could not find any results for the provided address and postcode")
-
-        return {"status": response.status_code}
-
-    def parse_classification_code(self, classification_code: str):
-        """
-        This function will convert the classification code, returned by the OS places api, to a property type that is
-        compatible with the EPC database.
-
-        The various classifications cane be found here:
-        https://osdatahub.os.uk/docs/places/technicalSpecification
-
-        Under LPI Output, CLASSIFICATION_CODE is described, and a link is provided to the full table of classifications
-        For these purposes, we do not need the full classification as this includes non-residential properties. We only
-        parse the ones of interest to us
-        :return:
-        """
-
-        value_map = {
-            # In the OS api, "RD" is a "Dwelling" however this is not valid property type in the EPC database
-            'RD': {},
-            'RD02': {'property_type': 'House', 'built_form': 'Detached'},
-            'RD03': {'property_type': 'House', 'built_form': 'Semi-Detached'},
-            'RD04': {'property_type': 'House', 'built_form': 'Mid-Terrace'},
-            'RD06': {'property_type': 'Flat'},
-        }
-        # Other classifications can be found in here:
-        # https://osdatahub.os.uk/docs/places/technicalSpecification in the CLASSIFICATION_CODE description.
-        # A lookup table csv can be downloaded which contains all of the codes
-
-        mapped = value_map.get(classification_code, {})
-        self.property_type = mapped.get("property_type", "")
-        self.built_form = mapped.get("built_form", "")

From 815ce010827bd46a72e22f1936146c7c68d36f2a Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 6 Mar 2026 12:51:08 +0000
Subject: [PATCH 07/21] lambda code works locally

---
 backend/address2UPRN/main.py                  | 33 +++++++---------
 backend/app/db/models/postcode_search.py      | 24 ++++++++++++
 backend/ordnanceSurvey/handler/Dockerfile     | 25 ++++++++++++
 .../ordnanceSurvey/handler/requirements.txt   | 11 ++++++
 .../local_handler/docker-compose.yml          | 11 ++++++
 .../local_handler/invoke_local_lambda.py      | 29 ++++++++++++++
 backend/ordnanceSurvey/main.py                | 38 +++++++++++++------
 7 files changed, 140 insertions(+), 31 deletions(-)
 create mode 100644 backend/app/db/models/postcode_search.py
 create mode 100644 backend/ordnanceSurvey/local_handler/docker-compose.yml
 create mode 100644 backend/ordnanceSurvey/local_handler/invoke_local_lambda.py

diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py
index 53e50617..ea588a77 100644
--- a/backend/address2UPRN/main.py
+++ b/backend/address2UPRN/main.py
@@ -436,19 +436,6 @@ def handler(event, context, local=False):
             # Process the rows
             logger.info(f"Processing {len(df)} rows for task {task_id}")
 
-            # Create user_input column by concatenating Address columns if not already present
-            if "user_input" not in df.columns:
-                df["user_input"] = (
-                    df["Address 1"].fillna("")
-                    + " "
-                    + df["Address 2"].fillna("")
-                    + " "
-                    + df["Address 3"].fillna("")
-                ).str.strip()
-                logger.info(f"Created user_input column from Address 1 and Address 2")
-            else:
-                logger.info(f"user_input column already present in data")
-
             clean_df = df.dropna(subset=["postcode_clean"])
 
             postcode_to_addresses = {
@@ -487,23 +474,29 @@ def handler(event, context, local=False):
                 # Process each address in this postcode with the same EPC data
                 for row in postcode_rows:
                     try:
-                        user_input = row.get("user_input", "")
-                        if not user_input:
+                        # Concatenate Address columns directly
+                        address2uprn_user_input = (
+                            str(row.get("Address 1", "")).strip() + " " +
+                            str(row.get("Address 2", "")).strip() + " " +
+                            str(row.get("Address 3", "")).strip()
+                        ).strip()
+
+                        if not address2uprn_user_input:
                             logger.warning(
-                                f"Skipping row with missing user_input for postcode {postcode}"
+                                f"Skipping row with missing address components for postcode {postcode}"
                             )
                             continue
 
                         # Get UPRN using the pre-fetched EPC data with all return options
                         result = get_uprn_with_epc_df(
-                            user_inputed_address=user_input, epc_df=epc_df, verbose=True
+                            user_inputed_address=address2uprn_user_input, epc_df=epc_df, verbose=True
                         )
 
                         # Parse result tuple if successful
                         if result:
                             uprn, found_address, score = result
                             logger.info(
-                                f"Found UPRN for {user_input} in {postcode}: {uprn} (score: {score})"
+                                f"Found UPRN for {address2uprn_user_input} in {postcode}: {uprn} (score: {score})"
                             )
 
                             results_data.append(
@@ -516,7 +509,7 @@ def handler(event, context, local=False):
                             )
                         else:
                             logger.warning(
-                                f"No UPRN found for {user_input} in {postcode}"
+                                f"No UPRN found for {address2uprn_user_input} in {postcode}"
                             )
                             results_data.append(
                                 {
@@ -529,7 +522,7 @@ def handler(event, context, local=False):
 
                     except Exception as e:
                         logger.error(
-                            f"Error processing address {row.get('user_input', 'unknown')}: {e}"
+                            f"Error processing address {row.get('address2uprn_user_input', 'unknown')}: {e}"
                         )
                         # Still add the row with error markers
                         results_data.append(
diff --git a/backend/app/db/models/postcode_search.py b/backend/app/db/models/postcode_search.py
new file mode 100644
index 00000000..1e3e03b8
--- /dev/null
+++ b/backend/app/db/models/postcode_search.py
@@ -0,0 +1,24 @@
+import pytz
+import datetime
+from sqlalchemy import (
+    Column,
+    BigInteger,
+    Text,
+    DateTime,
+)
+from sqlalchemy.dialects.postgresql import JSONB
+from sqlalchemy.ext.declarative import declarative_base
+
+Base = declarative_base()
+
+
+class PostcodeSearchModel(Base):
+    __tablename__ = "postcode_search"
+
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    postcode = Column(Text, nullable=False)
+    result_data = Column(JSONB, nullable=True)
+
+    created_at = Column(
+        DateTime(timezone=True), nullable=False, default=datetime.datetime.now(pytz.utc)
+    )
diff --git a/backend/ordnanceSurvey/handler/Dockerfile b/backend/ordnanceSurvey/handler/Dockerfile
index e69de29b..6a3cbe26 100644
--- a/backend/ordnanceSurvey/handler/Dockerfile
+++ b/backend/ordnanceSurvey/handler/Dockerfile
@@ -0,0 +1,25 @@
+FROM public.ecr.aws/lambda/python:3.11
+
+ARG DEV_DB_HOST
+ARG DEV_DB_PORT
+ARG DEV_DB_NAME
+
+ENV DB_HOST=${DEV_DB_HOST}
+ENV DB_PORT=${DEV_DB_PORT}
+ENV DB_NAME=${DEV_DB_NAME}
+
+# Set working directory (Lambda task root)
+WORKDIR /var/task
+
+COPY backend/ordnanceSurvey/handler/requirements.txt .
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy necessary files for database and utility imports
+COPY utils/ utils/
+COPY backend/ backend/
+COPY datatypes/ datatypes/
+
+# Lambda handler
+CMD ["backend/ordnanceSurvey/main.handler"]
+
diff --git a/backend/ordnanceSurvey/handler/requirements.txt b/backend/ordnanceSurvey/handler/requirements.txt
index e69de29b..6ef41b2d 100644
--- a/backend/ordnanceSurvey/handler/requirements.txt
+++ b/backend/ordnanceSurvey/handler/requirements.txt
@@ -0,0 +1,11 @@
+pandas==2.2.2
+numpy<2.0
+requests
+tqdm
+openpyxl
+epc-api-python==1.0.2
+boto3==1.35.44
+sqlmodel
+sqlalchemy==2.0.36
+psycopg2-binary==2.9.10
+pydantic-settings==2.6.0
\ No newline at end of file
diff --git a/backend/ordnanceSurvey/local_handler/docker-compose.yml b/backend/ordnanceSurvey/local_handler/docker-compose.yml
new file mode 100644
index 00000000..5f54e7da
--- /dev/null
+++ b/backend/ordnanceSurvey/local_handler/docker-compose.yml
@@ -0,0 +1,11 @@
+version: "3.9"
+
+services:
+  ordnance-survey-lambda:
+    build:
+      context: ../../../
+      dockerfile: backend/ordnanceSurvey/handler/Dockerfile
+    ports:
+      - "9000:8080"
+    env_file:
+      - ../../../.env
\ No newline at end of file
diff --git a/backend/ordnanceSurvey/local_handler/invoke_local_lambda.py b/backend/ordnanceSurvey/local_handler/invoke_local_lambda.py
new file mode 100644
index 00000000..c25f2d20
--- /dev/null
+++ b/backend/ordnanceSurvey/local_handler/invoke_local_lambda.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+import json
+import requests
+
+HOST = "localhost"
+PORT = "9000"
+
+LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
+
+payload = {
+    "Records": [
+        {
+            "body": json.dumps(
+                {
+                    "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
+                    "sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
+                    "s3_uri": "s3://retrofit-data-dev/ara_raw_outputs/e31f2f21-175b-4a91-a3ec-a6baa325e917/6a427b6e-1ece-4983-b1e5-9bffccc53d1d/2026-03-04T16:48:22.339995_634c88fc.csv",
+                    "lexiscore_column": "address2uprn_lexiscore",
+                }
+            )
+        }
+    ]
+}
+
+response = requests.post(LAMBDA_URL, json=payload)
+
+print("Status code:", response.status_code)
+print("Response:")
+print(response.text)
diff --git a/backend/ordnanceSurvey/main.py b/backend/ordnanceSurvey/main.py
index 4200bd24..6c4f3080 100644
--- a/backend/ordnanceSurvey/main.py
+++ b/backend/ordnanceSurvey/main.py
@@ -68,17 +68,19 @@ def get_ordance_survey_record(row, cache=None):
 def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
 
     # delete this line after test
-    local = True
+    # local = True
     # Example SQS message for testing (copy and paste into SQS):
     if local is True:
         body = {
             "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
             "sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
             "s3_uri": "s3://retrofit-data-dev/ara_raw_outputs/e31f2f21-175b-4a91-a3ec-a6baa325e917/6a427b6e-1ece-4983-b1e5-9bffccc53d1d/2026-03-04T16:48:22.339995_634c88fc.csv",
+            "lexiscore_column": "address2uprn_lexiscore",
         }
 
     s3_uri: str = body.get("s3_uri", "")
     lexiscore_threshold: float = body.get("lexiscore_threshold", 0.5)
+    lexiscore_column: str = body.get("lexiscore_column", None)
 
     if s3_uri == "":
         raise RuntimeError("Missing s3_uri in message body")
@@ -88,13 +90,17 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
     # Assumption designing with address2uprn was ran first
     csv_data = read_csv_from_s3_dict(bucket, key)
     df = pd.DataFrame(csv_data)
-    df["address2uprn_lexiscore"] = pd.to_numeric(
-        df["address2uprn_lexiscore"], errors="coerce"
-    )
-    needs_processing = df[
-        df["address2uprn_lexiscore"].isna()
-        | (df["address2uprn_lexiscore"] < lexiscore_threshold)
-    ]
+    df = df.head(5)
+
+    # If lexiscore_column is specified, use it; otherwise process all rows
+    if lexiscore_column and lexiscore_column in df.columns:
+        df[lexiscore_column] = pd.to_numeric(df[lexiscore_column], errors="coerce")
+        needs_processing = df[
+            df[lexiscore_column].isna() | (df[lexiscore_column] < lexiscore_threshold)
+        ]
+    else:
+        # Default: process all rows
+        needs_processing = df
 
     grouped = needs_processing.groupby("postcode_clean")
 
@@ -137,13 +143,21 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
             continue
 
         for idx, row in group.iterrows():
-            user_address = str(row.get("user_input", "")).strip()
-            if not user_address:
+            # Concatenate Address columns directly
+            ordnancy_survey_user_input = (
+                str(row.get("Address 1", "")).strip()
+                + " "
+                + str(row.get("Address 2", "")).strip()
+                + " "
+                + str(row.get("Address 3", "")).strip()
+            ).strip()
+
+            if not ordnancy_survey_user_input:
                 continue
 
             # Score against OS Places addresses
             scores = postcode_cache["ADDRESS"].apply(
-                lambda addr: addressMatch.score(user_address, addr)
+                lambda addr: addressMatch.score(ordnancy_survey_user_input, addr)
             )
             best_idx = scores.idxmax()
             best_score = scores[best_idx]
@@ -157,3 +171,5 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
     # TODO: Save new results to s3 (ask Khalim if we want to save to db)
     df.to_csv("ordnance_survey_results.csv", index=False)
     print(f"Results saved to ordnance_survey_results.csv ({len(df)} rows)")
+
+    # TODO upload to s3 once you get confirmation from Khalim or db

From 071a67e501bb760692925e7fe30bd584b3708169 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 6 Mar 2026 13:29:25 +0000
Subject: [PATCH 08/21] ordnancesurvey deployment

---
 .github/workflows/deploy_terraform.yml        | 39 +++++++++++++
 backend/address2UPRN/main.py                  | 12 ++--
 backend/ordnanceSurvey/main.py                | 57 ++++++++++++++++++-
 .../terraform/lambda/ordnanceSurvey/main.tf   | 57 +++++++++++++++++++
 .../lambda/ordnanceSurvey/provider.tf         | 16 ++++++
 .../lambda/ordnanceSurvey/variables.tf        | 32 +++++++++++
 infrastructure/terraform/shared/main.tf       | 34 ++++++++++-
 7 files changed, 239 insertions(+), 8 deletions(-)
 create mode 100644 infrastructure/terraform/lambda/ordnanceSurvey/main.tf
 create mode 100644 infrastructure/terraform/lambda/ordnanceSurvey/provider.tf
 create mode 100644 infrastructure/terraform/lambda/ordnanceSurvey/variables.tf

diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml
index 4c9ce44a..aac49923 100644
--- a/.github/workflows/deploy_terraform.yml
+++ b/.github/workflows/deploy_terraform.yml
@@ -242,3 +242,42 @@ jobs:
       AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
 
 
+  # ============================================================
+  # 2️⃣ Build OrdanceSurvey image and Push
+  # ============================================================
+  ordnanceSurvey_image:
+    needs: [determine_stage, shared_terraform]
+    uses: ./.github/workflows/_build_image.yml
+    with:
+      ecr_repo: ordnance-${{ needs.determine_stage.outputs.stage }}
+      dockerfile_path: backend/ordnanceSurvey/handler/Dockerfile
+      build_context: .
+      build_args: |
+        DEV_DB_HOST=$DEV_DB_HOST
+        DEV_DB_PORT=$DEV_DB_PORT
+        DEV_DB_NAME=$DEV_DB_NAME
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+      DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
+      DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
+      DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
+
+  # ============================================================
+  # 3️⃣ Deploy OrdanceSurvey Lambda
+  # ============================================================
+  ordnanceSurvey_lambda:
+    needs: [ordnanceSurvey_image, determine_stage]
+    uses: ./.github/workflows/_deploy_lambda.yml
+    with:
+      lambda_name: ordnanceSurvey
+      lambda_path: infrastructure/terraform/lambda/ordnanceSurvey
+      stage: ${{ needs.determine_stage.outputs.stage }}
+      ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
+      image_digest: ${{ needs.ordnanceSurvey_image.outputs.image_digest }}
+      terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
\ No newline at end of file
diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py
index ea588a77..33cb6ff9 100644
--- a/backend/address2UPRN/main.py
+++ b/backend/address2UPRN/main.py
@@ -476,9 +476,11 @@ def handler(event, context, local=False):
                     try:
                         # Concatenate Address columns directly
                         address2uprn_user_input = (
-                            str(row.get("Address 1", "")).strip() + " " +
-                            str(row.get("Address 2", "")).strip() + " " +
-                            str(row.get("Address 3", "")).strip()
+                            str(row.get("Address 1", "")).strip()
+                            + " "
+                            + str(row.get("Address 2", "")).strip()
+                            + " "
+                            + str(row.get("Address 3", "")).strip()
                         ).strip()
 
                         if not address2uprn_user_input:
@@ -489,7 +491,9 @@ def handler(event, context, local=False):
 
                         # Get UPRN using the pre-fetched EPC data with all return options
                         result = get_uprn_with_epc_df(
-                            user_inputed_address=address2uprn_user_input, epc_df=epc_df, verbose=True
+                            user_inputed_address=address2uprn_user_input,
+                            epc_df=epc_df,
+                            verbose=True,
                         )
 
                         # Parse result tuple if successful
diff --git a/backend/ordnanceSurvey/main.py b/backend/ordnanceSurvey/main.py
index 6c4f3080..0a0e2a8a 100644
--- a/backend/ordnanceSurvey/main.py
+++ b/backend/ordnanceSurvey/main.py
@@ -4,7 +4,7 @@ from utils.logger import setup_logger
 import logging
 from backend.utils.subtasks import subtask_handler
 from utils.s3 import (
-    # save_csv_to_s3,
+    save_csv_to_s3,
     read_csv_from_s3 as read_csv_from_s3_dict,
     parse_s3_uri,
 )
@@ -17,6 +17,9 @@ from backend.utils.ordnance_survey import (
 )
 from backend.app.config import get_settings
 from sqlalchemy import select
+from datetime import datetime
+import uuid
+import os
 
 import pandas as pd
 
@@ -64,6 +67,47 @@ def get_ordance_survey_record(row, cache=None):
     # process cache with row
 
 
+def save_results_to_s3(
+    results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
+) -> bool:
+    """
+    Save results DataFrame to S3 as CSV in a parent folder structure.
+
+    :param results_df: The DataFrame containing results
+    :param task_id: The task ID (used for file naming)
+    :param sub_task_id: The subtask ID (used for file naming)
+    :param bucket_name: The S3 bucket name (defaults to env variable)
+    :return: True if successful, False otherwise
+    """
+    if bucket_name is None:
+        bucket_name = os.getenv("S3_BUCKET_NAME")
+
+    if not bucket_name:
+        logger.error(
+            "S3 bucket name not provided and S3_BUCKET_NAME environment variable not set"
+        )
+        return False
+
+    try:
+        # Create a filename with timestamp and UUID
+        file_name = f"{datetime.now().isoformat()}_{str(uuid.uuid4())[:8]}"
+        file_key = f"ara_ordnance_survey_outputs/{task_id}/{sub_task_id}/ordnanceSurvey/{file_name}.csv"
+
+        # Save to S3
+        success = save_csv_to_s3(results_df, bucket_name, file_key)
+
+        if success:
+            logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}")
+            return True
+        else:
+            logger.error(f"Failed to save results to S3")
+            return False
+
+    except Exception as e:
+        logger.error(f"Error saving results to S3: {str(e)}")
+        return False
+
+
 @subtask_handler()  # This assumes task_id and subtask_id is defined in event.Records.body
 def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
 
@@ -81,6 +125,8 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
     s3_uri: str = body.get("s3_uri", "")
     lexiscore_threshold: float = body.get("lexiscore_threshold", 0.5)
     lexiscore_column: str = body.get("lexiscore_column", None)
+    task_id: str = body.get("task_id", "")
+    sub_task_id: str = body.get("sub_task_id", "")
 
     if s3_uri == "":
         raise RuntimeError("Missing s3_uri in message body")
@@ -168,8 +214,13 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
             df.at[idx, "ordnance_survey_uprn"] = postcode_cache.at[best_idx, "UPRN"]
             df.at[idx, "ordnance_survey_lexiscore"] = best_score
 
-    # TODO: Save new results to s3 (ask Khalim if we want to save to db)
+    # Save results locally
     df.to_csv("ordnance_survey_results.csv", index=False)
     print(f"Results saved to ordnance_survey_results.csv ({len(df)} rows)")
 
-    # TODO upload to s3 once you get confirmation from Khalim or db
+    # Save results to S3
+    if task_id and sub_task_id:
+        try:
+            save_results_to_s3(df, task_id, sub_task_id)
+        except Exception as s3_error:
+            logger.error(f"Failed to save results to S3: {s3_error}")
diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/main.tf b/infrastructure/terraform/lambda/ordnanceSurvey/main.tf
new file mode 100644
index 00000000..baa673e1
--- /dev/null
+++ b/infrastructure/terraform/lambda/ordnanceSurvey/main.tf
@@ -0,0 +1,57 @@
+data "terraform_remote_state" "shared" {
+  backend = "s3"
+  config = {
+    bucket = "assessment-model-terraform-state"
+    key = "env:/${var.stage}/terraform.tfstate"
+    region = "eu-west-2"
+  }
+}
+data "aws_secretsmanager_secret_version" "db_credentials" {
+  secret_id = "${var.stage}/assessment_model/db_credentials"
+}
+locals {
+  db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
+}
+
+module "ordnance" {
+  source = "../modules/lambda_with_sqs"
+
+  name  = ordnanceSurvey #"address2uprn" for example
+  stage = var.stage
+
+  image_uri = local.image_uri
+
+  timeout = 900
+
+  # Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000)
+  maximum_concurrency = var.maximum_concurrency
+
+  environment = merge(
+    {
+      STAGE     = var.stage
+      LOG_LEVEL = "info"
+      DB_USERNAME = local.db_credentials.db_assessment_model_username
+      DB_PASSWORD = local.db_credentials.db_assessment_model_password
+      GOOGLE_SOLAR_API_KEY = "test"
+      SAP_PREDICTIONS_BUCKET = "test"
+      CARBON_PREDICTIONS_BUCKET = "test"
+      HEAT_PREDICTIONS_BUCKET = "test"
+      HEATING_KWH_PREDICTIONS_BUCKET = "test"
+      HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
+      API_KEY = "test"
+      ENVIRONMENT = "test"
+      SECRET_KEY = "test"
+      PLAN_TRIGGER_BUCKET = "test"
+      DATA_BUCKET = "test"
+      ENGINE_SQS_URL = "test"
+      ENERGY_ASSESSMENTS_BUCKET = "test"
+      S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
+    },
+  )
+}
+
+# Attach S3 read policy to the Lambda execution role
+resource "aws_iam_role_policy_attachment" "ordanceSurvey_read_and_write" {
+  role       = module.ordnance.role_name
+  policy_arn = data.terraform_remote_state.shared.outputs.ordnance_s3_read_and_write_arn
+}
diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/provider.tf b/infrastructure/terraform/lambda/ordnanceSurvey/provider.tf
new file mode 100644
index 00000000..37c412ce
--- /dev/null
+++ b/infrastructure/terraform/lambda/ordnanceSurvey/provider.tf
@@ -0,0 +1,16 @@
+terraform {
+  required_providers {
+    aws = {
+      source  = "hashicorp/aws"
+      version = "~> 4.16"
+    }
+  }
+
+  backend "s3" {
+    bucket = REPLACE_ME
+    key    = "terraform.tfstate"
+    region = "eu-west-2"
+  }
+
+  required_version = ">= 1.2.0"
+}
\ No newline at end of file
diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/variables.tf b/infrastructure/terraform/lambda/ordnanceSurvey/variables.tf
new file mode 100644
index 00000000..e0061321
--- /dev/null
+++ b/infrastructure/terraform/lambda/ordnanceSurvey/variables.tf
@@ -0,0 +1,32 @@
+variable "lambda_name" {
+  type        = string
+  description = "Logical name of the lambda (e.g. address2uprn)"
+}
+
+variable "stage" {
+  description = "Deployment stage (e.g. dev, prod)"
+  type        = string
+}
+variable "ecr_repo_url" {
+  type        = string
+  description = "ECR repository URL (no tag, no digest)"
+}
+
+variable "image_digest" {
+  type        = string
+  description = "Image digest (sha256:...)"
+}
+
+variable "maximum_concurrency" {
+  type        = number
+  default     = null
+  description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit."
+}
+
+locals {
+  image_uri = "${var.ecr_repo_url}@${var.image_digest}"
+}
+
+output "resolved_image_uri" {
+  value = local.image_uri
+}
diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf
index cca3394f..df519f4f 100644
--- a/infrastructure/terraform/shared/main.tf
+++ b/infrastructure/terraform/shared/main.tf
@@ -414,4 +414,36 @@ module "categorisation_registry" {
   source = "../modules/container_registry"
   name   = "categorisation"
   stage = var.stage
-}
\ No newline at end of file
+}
+
+
+################################################
+# OrdnanceSurveyAPI – Lambda
+################################################
+module "ordnance_state_bucket" {
+  source      = "../modules/tf_state_bucket"
+  bucket_name = "ordnance-terraform-state"
+
+}
+
+module "ordnance_registry" {
+  source = "../modules/container_registry"
+  name   = "ordnance"
+  stage = var.stage
+
+}
+
+# S3 policy for postcode splitter to read from retrofit data bucket
+module "ordnance_s3_read_and_write" {
+  source = "../modules/s3_iam_policy"
+
+  policy_name        = "Address2UPRNReadandWriteS3"
+  policy_description = "Allow ordnance Lambda to read and write from retrofit-data bucket"
+  bucket_arns        = ["arn:aws:s3:::retrofit-data-${var.stage}"]
+  actions            = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"]
+  resource_paths     = ["/*"]
+}
+
+output "ordnance_s3_read_and_write_arn" {
+  value = module.ordnance_s3_read_and_write.policy_arn
+}

From a147c9111191e803473c891328542a5856998ba3 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 6 Mar 2026 13:41:54 +0000
Subject: [PATCH 09/21] get rid of redudant env

---
 .../terraform/lambda/address2UPRN/main.tf          | 13 -------------
 .../terraform/lambda/ordnanceSurvey/main.tf        | 14 +-------------
 2 files changed, 1 insertion(+), 26 deletions(-)

diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/infrastructure/terraform/lambda/address2UPRN/main.tf
index 2d185497..3afc8738 100644
--- a/infrastructure/terraform/lambda/address2UPRN/main.tf
+++ b/infrastructure/terraform/lambda/address2UPRN/main.tf
@@ -33,19 +33,6 @@ module "address2uprn" {
       LOG_LEVEL = "info"
       DB_USERNAME = local.db_credentials.db_assessment_model_username
       DB_PASSWORD = local.db_credentials.db_assessment_model_password
-      GOOGLE_SOLAR_API_KEY = "test"
-      SAP_PREDICTIONS_BUCKET = "test"
-      CARBON_PREDICTIONS_BUCKET = "test"
-      HEAT_PREDICTIONS_BUCKET = "test"
-      HEATING_KWH_PREDICTIONS_BUCKET = "test"
-      HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
-      API_KEY = "test"
-      ENVIRONMENT = "test"
-      SECRET_KEY = "test"
-      PLAN_TRIGGER_BUCKET = "test"
-      DATA_BUCKET = "test"
-      ENGINE_SQS_URL = "test"
-      ENERGY_ASSESSMENTS_BUCKET = "test"
       S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
     },
   )
diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/main.tf b/infrastructure/terraform/lambda/ordnanceSurvey/main.tf
index baa673e1..3af33ad7 100644
--- a/infrastructure/terraform/lambda/ordnanceSurvey/main.tf
+++ b/infrastructure/terraform/lambda/ordnanceSurvey/main.tf
@@ -32,20 +32,8 @@ module "ordnance" {
       LOG_LEVEL = "info"
       DB_USERNAME = local.db_credentials.db_assessment_model_username
       DB_PASSWORD = local.db_credentials.db_assessment_model_password
-      GOOGLE_SOLAR_API_KEY = "test"
-      SAP_PREDICTIONS_BUCKET = "test"
-      CARBON_PREDICTIONS_BUCKET = "test"
-      HEAT_PREDICTIONS_BUCKET = "test"
-      HEATING_KWH_PREDICTIONS_BUCKET = "test"
-      HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
-      API_KEY = "test"
-      ENVIRONMENT = "test"
-      SECRET_KEY = "test"
-      PLAN_TRIGGER_BUCKET = "test"
-      DATA_BUCKET = "test"
-      ENGINE_SQS_URL = "test"
-      ENERGY_ASSESSMENTS_BUCKET = "test"
       S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
+      ORDNANCE_SURVEY_API_KEY:= "Reminder to add This somehow, ask if we are doing aws secret method or github secret method"
     },
   )
 }

From ac344be09408e890be56ba348e18beb9919a5d1a Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 6 Mar 2026 13:44:57 +0000
Subject: [PATCH 10/21] re add

---
 OrdnanceSurvey.py | 139 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 139 insertions(+)
 create mode 100644 OrdnanceSurvey.py

diff --git a/OrdnanceSurvey.py b/OrdnanceSurvey.py
new file mode 100644
index 00000000..1ae66152
--- /dev/null
+++ b/OrdnanceSurvey.py
@@ -0,0 +1,139 @@
+from functools import lru_cache
+import urllib.parse
+import requests
+from utils.logger import setup_logger
+
+logger = setup_logger()
+
+
+class OrdnanceSuveyClient:
+
+    def __init__(self, address, postcode, api_key):
+        """
+        This class is tasked with interaction with the ordnance survey API.
+        :param address: The address for the property to search for
+        :param postcode: The postcode for the property to search for
+        """
+
+        self.address = address
+        self.postcode = postcode
+        self.full_address = ", ".join([self.address, self.postcode])
+        self.api_key = api_key
+
+        self.results = None
+
+        self.most_relevant_result = None
+        self.property_type = None
+        self.built_form = None
+        # This will be postcode and address, as returned by the ordnance survey
+        self.address_os = None
+        self.postcode_os = None
+
+    def set_places_address(self):
+        """
+        Given a response from the places api, this function will set the address and postcode of the property
+        """
+
+        if self.most_relevant_result is None:
+            raise ValueError("No results found - run get_places_api first")
+
+        self.address_os = self.most_relevant_result["ADDRESS"]
+
+        if "POSTCODE" in self.most_relevant_result:
+            self.postcode_os = self.most_relevant_result["POSTCODE"]
+        else:
+            self.postcode_os = self.most_relevant_result["POSTCODE_LOCATOR"]
+        # We strip out the postcode from the address as this is already stored separately
+        self.address_os = self.address_os.replace(self.postcode_os, "").strip()
+        # Remove trailing comma
+        self.address_os = self.address_os.rstrip(",").strip()
+        # Convert to title case
+        self.address_os = self.address_os.title()
+        # Make sure postcode is upper case
+        self.postcode_os = self.postcode_os.upper()
+
+    @lru_cache(maxsize=128)
+    def get_places_api(self, filter_by_postcode=False):
+        """
+        This method is tasked with getting the places api from the Ordnance Survey.
+        """
+
+        if not self.api_key:
+            raise ValueError("Ordnance Survey API key not specified")
+
+        encoded_address_query = urllib.parse.quote(self.full_address)
+
+        url = (
+            f"https://api.os.uk/search/places/v1/find?query={encoded_address_query}&dataset=DPA,LPI&matchprecision=10"
+            f"&key={self.api_key}"
+        )
+
+        response = requests.get(url)
+        if response.status_code == 200:
+            data = response.json()
+            res = data["results"]
+
+            if filter_by_postcode:
+                results = []
+                for r in res:
+                    if "DPA" in r:
+                        if r["DPA"]["POSTCODE"] == self.postcode:
+                            results.append(r)
+                    elif "LPI" in r:
+                        if r["LPI"]["POSTCODE_LOCATOR"] == self.postcode:
+                            results.append(r)
+                    else:
+                        raise ValueError("Could not find postcode in either DPA or LPI")
+            else:
+                results = res
+
+            self.results = results
+
+            # Extract some details about the best match
+            self.most_relevant_result = (
+                self.results[0]["DPA"]
+                if "DPA" in self.results[0]
+                else self.results[0]["LPI"]
+            )
+
+            self.parse_classification_code(
+                self.most_relevant_result["CLASSIFICATION_CODE"]
+            )
+            self.set_places_address()
+
+        else:
+            logger.info(
+                "Could not find any results for the provided address and postcode"
+            )
+
+        return {"status": response.status_code}
+
+    def parse_classification_code(self, classification_code: str):
+        """
+        This function will convert the classification code, returned by the OS places api, to a property type that is
+        compatible with the EPC database.
+
+        The various classifications cane be found here:
+        https://osdatahub.os.uk/docs/places/technicalSpecification
+
+        Under LPI Output, CLASSIFICATION_CODE is described, and a link is provided to the full table of classifications
+        For these purposes, we do not need the full classification as this includes non-residential properties. We only
+        parse the ones of interest to us
+        :return:
+        """
+
+        value_map = {
+            # In the OS api, "RD" is a "Dwelling" however this is not valid property type in the EPC database
+            "RD": {},
+            "RD02": {"property_type": "House", "built_form": "Detached"},
+            "RD03": {"property_type": "House", "built_form": "Semi-Detached"},
+            "RD04": {"property_type": "House", "built_form": "Mid-Terrace"},
+            "RD06": {"property_type": "Flat"},
+        }
+        # Other classifications can be found in here:
+        # https://osdatahub.os.uk/docs/places/technicalSpecification in the CLASSIFICATION_CODE description.
+        # A lookup table csv can be downloaded which contains all of the codes
+
+        mapped = value_map.get(classification_code, {})
+        self.property_type = mapped.get("property_type", "")
+        self.built_form = mapped.get("built_form", "")

From 9852eb631bd4c1a2a0c878abca6b63c61be3d534 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 6 Mar 2026 13:49:05 +0000
Subject: [PATCH 11/21] added ordnance state

---
 infrastructure/terraform/lambda/ordnanceSurvey/provider.tf | 2 +-
 infrastructure/terraform/shared/main.tf                    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/provider.tf b/infrastructure/terraform/lambda/ordnanceSurvey/provider.tf
index 37c412ce..b7f453f1 100644
--- a/infrastructure/terraform/lambda/ordnanceSurvey/provider.tf
+++ b/infrastructure/terraform/lambda/ordnanceSurvey/provider.tf
@@ -7,7 +7,7 @@ terraform {
   }
 
   backend "s3" {
-    bucket = REPLACE_ME
+    bucket = "ordnance-terraform-state"
     key    = "terraform.tfstate"
     region = "eu-west-2"
   }
diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf
index 7e46e787..905fbc78 100644
--- a/infrastructure/terraform/shared/main.tf
+++ b/infrastructure/terraform/shared/main.tf
@@ -472,7 +472,7 @@ module "ordnance_registry" {
 module "ordnance_s3_read_and_write" {
   source = "../modules/s3_iam_policy"
 
-  policy_name        = "Address2UPRNReadandWriteS3"
+  policy_name        = "OrdnanceSurveyReadandWriteS3"
   policy_description = "Allow ordnance Lambda to read and write from retrofit-data bucket"
   bucket_arns        = ["arn:aws:s3:::retrofit-data-${var.stage}"]
   actions            = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"]

From 58843a5409209d23abc99d0273a19b4948ca802f Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 6 Mar 2026 13:51:47 +0000
Subject: [PATCH 12/21] put batch size as well

---
 infrastructure/terraform/lambda/ordnanceSurvey/variables.tf | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/infrastructure/terraform/lambda/ordnanceSurvey/variables.tf b/infrastructure/terraform/lambda/ordnanceSurvey/variables.tf
index e0061321..e7646811 100644
--- a/infrastructure/terraform/lambda/ordnanceSurvey/variables.tf
+++ b/infrastructure/terraform/lambda/ordnanceSurvey/variables.tf
@@ -23,6 +23,11 @@ variable "maximum_concurrency" {
   description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit."
 }
 
+variable "batch_size" {
+  type    = number
+  default = 1
+}
+
 locals {
   image_uri = "${var.ecr_repo_url}@${var.image_digest}"
 }

From 8163a97b97d43fb3cc6d9ca020e82835b67e3d52 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 6 Mar 2026 13:57:33 +0000
Subject: [PATCH 13/21] get rid of df.head and control the inputs instead

---
 backend/ordnanceSurvey/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/ordnanceSurvey/main.py b/backend/ordnanceSurvey/main.py
index 0a0e2a8a..bf8cfdaf 100644
--- a/backend/ordnanceSurvey/main.py
+++ b/backend/ordnanceSurvey/main.py
@@ -136,7 +136,7 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
     # Assumption designing with address2uprn was ran first
     csv_data = read_csv_from_s3_dict(bucket, key)
     df = pd.DataFrame(csv_data)
-    df = df.head(5)
+    # df = df.head(5)
 
     # If lexiscore_column is specified, use it; otherwise process all rows
     if lexiscore_column and lexiscore_column in df.columns:

From ef45ed62f8542899b6ea0c61978a9cf2156db097 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 6 Mar 2026 14:53:51 +0000
Subject: [PATCH 14/21] ordance survey init file

---
 backend/ordnanceSurvey/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 backend/ordnanceSurvey/__init__.py

diff --git a/backend/ordnanceSurvey/__init__.py b/backend/ordnanceSurvey/__init__.py
new file mode 100644
index 00000000..e69de29b

From 19ab0ad7574b076b01d47d516f4cab47695cb79c Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 6 Mar 2026 14:58:49 +0000
Subject: [PATCH 15/21] deleted the wrong folder aded back the origional

---
 .../OrdnanceSurvey.py                         | 24 +++++++------------
 1 file changed, 8 insertions(+), 16 deletions(-)
 rename OrdnanceSurvey.py => backend/OrdnanceSurvey.py (86%)

diff --git a/OrdnanceSurvey.py b/backend/OrdnanceSurvey.py
similarity index 86%
rename from OrdnanceSurvey.py
rename to backend/OrdnanceSurvey.py
index 1ae66152..a4d716d0 100644
--- a/OrdnanceSurvey.py
+++ b/backend/OrdnanceSurvey.py
@@ -90,21 +90,13 @@ class OrdnanceSuveyClient:
             self.results = results
 
             # Extract some details about the best match
-            self.most_relevant_result = (
-                self.results[0]["DPA"]
-                if "DPA" in self.results[0]
-                else self.results[0]["LPI"]
-            )
+            self.most_relevant_result = self.results[0]["DPA"] if "DPA" in self.results[0] else self.results[0]["LPI"]
 
-            self.parse_classification_code(
-                self.most_relevant_result["CLASSIFICATION_CODE"]
-            )
+            self.parse_classification_code(self.most_relevant_result["CLASSIFICATION_CODE"])
             self.set_places_address()
 
         else:
-            logger.info(
-                "Could not find any results for the provided address and postcode"
-            )
+            logger.info("Could not find any results for the provided address and postcode")
 
         return {"status": response.status_code}
 
@@ -124,11 +116,11 @@ class OrdnanceSuveyClient:
 
         value_map = {
             # In the OS api, "RD" is a "Dwelling" however this is not valid property type in the EPC database
-            "RD": {},
-            "RD02": {"property_type": "House", "built_form": "Detached"},
-            "RD03": {"property_type": "House", "built_form": "Semi-Detached"},
-            "RD04": {"property_type": "House", "built_form": "Mid-Terrace"},
-            "RD06": {"property_type": "Flat"},
+            'RD': {},
+            'RD02': {'property_type': 'House', 'built_form': 'Detached'},
+            'RD03': {'property_type': 'House', 'built_form': 'Semi-Detached'},
+            'RD04': {'property_type': 'House', 'built_form': 'Mid-Terrace'},
+            'RD06': {'property_type': 'Flat'},
         }
         # Other classifications can be found in here:
         # https://osdatahub.os.uk/docs/places/technicalSpecification in the CLASSIFICATION_CODE description.

From 124a34597a5e5a373b757e7b2cc36e6238e6578d Mon Sep 17 00:00:00 2001
From: Jun-te Kim <junte.kim@mealcraft.com>
Date: Mon, 9 Mar 2026 12:15:22 +0000
Subject: [PATCH 16/21] save so i can run it from mealcraft

---
 .devcontainer/backend/post-install.sh | 24 +++++-----
 backend/app/config.py                 |  1 +
 backend/ordnanceSurvey/helpers.py     | 66 +++++++++++++++++++++++++++
 backend/ordnanceSurvey/main.py        | 21 +++------
 backend/ordnanceSurvey/types.py       | 44 ++++++++++++++++++
 backend/utils/ordnance_survey.py      | 44 ------------------
 6 files changed, 129 insertions(+), 71 deletions(-)
 create mode 100644 backend/ordnanceSurvey/helpers.py
 create mode 100644 backend/ordnanceSurvey/types.py
 delete mode 100644 backend/utils/ordnance_survey.py

diff --git a/.devcontainer/backend/post-install.sh b/.devcontainer/backend/post-install.sh
index 48fbfde1..20c699d6 100644
--- a/.devcontainer/backend/post-install.sh
+++ b/.devcontainer/backend/post-install.sh
@@ -1,14 +1,14 @@
-mkdir -p ~/.ipython/profile_default/startup
+# mkdir -p ~/.ipython/profile_default/startup
 
-cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py
-from dotenv import load_dotenv
-import os
+# cat << 'EOF' > ~/.ipython/profile_default/startup/00-load-env.py
+# from dotenv import load_dotenv
+# import os
 
-# Adjust path as needed
-env_path = "/workspaces/model/backend/.env"
-if os.path.exists(env_path):
-    load_dotenv(env_path)
-    print("✔ Loaded .env into Jupyter kernel")
-else:
-    print("⚠ No .env file found to load")
-EOF
+# # Adjust path as needed
+# env_path = "/workspaces/model/backend/.env"
+# if os.path.exists(env_path):
+#     load_dotenv(env_path)
+#     print("✔ Loaded .env into Jupyter kernel")
+# else:
+#     print("⚠ No .env file found to load")
+# EOF
diff --git a/backend/app/config.py b/backend/app/config.py
index b5b29137..e87f8374 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -64,6 +64,7 @@ class Settings(BaseSettings):
     ENERGY_ASSESSMENTS_BUCKET: str = "changeme"
 
     ORDNANCE_SURVEY_API_KEY: str = "changeme"
+    PLAN_TRIGGER_BUCKET: str = "changeme"
 
     # Optional AWS creds (only required in local)
     AWS_ACCESS_KEY_ID: Optional[str] = None
diff --git a/backend/ordnanceSurvey/helpers.py b/backend/ordnanceSurvey/helpers.py
new file mode 100644
index 00000000..d59060b8
--- /dev/null
+++ b/backend/ordnanceSurvey/helpers.py
@@ -0,0 +1,66 @@
+import urllib.parse
+from pydantic import ValidationError
+import requests
+import pandas as pd
+from utils.logger import setup_logger
+from backend.ordnanceSurvey.types import PostcodeResponse
+
+logger = setup_logger()
+
+
+def os_places_results_to_dataframe(data: dict) -> pd.DataFrame:
+    """
+    Flatten the OS Places API response results into a DataFrame.
+    Each result contains either a DPA or LPI record.
+    """
+    results = data.get("results", [])
+    rows = []
+    for r in results:
+        if "DPA" in r:
+            rows.append(r["DPA"])
+        elif "LPI" in r:
+            rows.append(r["LPI"])
+    return pd.DataFrame(rows)
+
+
+import urllib.parse
+import requests
+import logging
+from pydantic import ValidationError
+
+logger = logging.getLogger(__name__)
+
+
+def lookup_os_places(postcode: str, api_key: str) -> PostcodeResponse:
+    """
+    Lookup a postcode using the OS Places API.
+    Returns a validated PostcodeResponse.
+    Raises exceptions on failure.
+    """
+    if not api_key:
+        raise ValueError("Ordnance Survey API key not specified")
+
+    encoded_postcode = urllib.parse.quote(postcode)
+
+    url = (
+        f"https://api.os.uk/search/places/v1/postcode?postcode={encoded_postcode}"
+        f"&dataset=DPA,LPI&key={api_key}"
+    )
+
+    response = requests.get(url)
+
+    if response.status_code != 200:
+        logger.error(
+            "OS Places API error for postcode %s: %s",
+            postcode,
+            response.status_code,
+        )
+        raise RuntimeError(f"OS Places lookup failed for postcode {postcode}")
+
+    try:
+        raw = response.json()
+        return PostcodeResponse.model_validate(raw)
+
+    except ValidationError as e:
+        logger.error("OS Places response validation failed: %s", e)
+        raise RuntimeError("Invalid response format from OS Places API") from e
diff --git a/backend/ordnanceSurvey/main.py b/backend/ordnanceSurvey/main.py
index bf8cfdaf..835910f5 100644
--- a/backend/ordnanceSurvey/main.py
+++ b/backend/ordnanceSurvey/main.py
@@ -1,5 +1,6 @@
 from typing import Any
 import json
+from backend.ordnanceSurvey.types import PostcodeResponse
 from utils.logger import setup_logger
 import logging
 from backend.utils.subtasks import subtask_handler
@@ -11,7 +12,7 @@ from utils.s3 import (
 from backend.utils.addressMatch import addressMatch
 from backend.app.db.connection import get_db_session
 from backend.app.db.models.postcode_search import PostcodeSearchModel
-from backend.utils.ordnance_survey import (
+from backend.ordnanceSurvey.helpers import (
     lookup_os_places,
     os_places_results_to_dataframe,
 )
@@ -27,6 +28,7 @@ logger: logging.Logger = setup_logger()
 
 
 def check_if_post_code_exists_in_db_cache(postcode):
+    postcode = "SE22 9AL"
 
     with get_db_session() as session:
         result = (
@@ -43,16 +45,12 @@ def check_if_post_code_exists_in_db_cache(postcode):
 
         # Cache miss — fetch from OS Places API
         api_key = get_settings().ORDNANCE_SURVEY_API_KEY
-        response = lookup_os_places(postcode, api_key)
-
-        if response.get("status") != 200 or "data" not in response:
-            logger.error(f"OS Places API failed for {postcode}: {response}")
-            return pd.DataFrame()
+        response: PostcodeResponse = lookup_os_places(postcode, api_key)
 
         # Save to cache
         new_record = PostcodeSearchModel(
             postcode=postcode,
-            result_data=response["data"],
+            result_data=response.results,
         )
         session.add(new_record)
         session.commit()
@@ -60,13 +58,6 @@ def check_if_post_code_exists_in_db_cache(postcode):
         return os_places_results_to_dataframe(response["data"])
 
 
-def get_ordance_survey_record(row, cache=None):
-    if cache is None:
-        cache = check_if_post_code_exists_in_db_cache(postcode)
-
-    # process cache with row
-
-
 def save_results_to_s3(
     results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
 ) -> bool:
@@ -100,7 +91,7 @@ def save_results_to_s3(
             logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}")
             return True
         else:
-            logger.error(f"Failed to save results to S3")
+            logger.error(f"Failed to save results to S3 {bucket_name}/{file_key}")
             return False
 
     except Exception as e:
diff --git a/backend/ordnanceSurvey/types.py b/backend/ordnanceSurvey/types.py
new file mode 100644
index 00000000..0631ff67
--- /dev/null
+++ b/backend/ordnanceSurvey/types.py
@@ -0,0 +1,44 @@
+from pydantic import BaseModel
+from typing import List
+
+
+class OrdnanceSurveyResponse(BaseModel):
+    RPC: str
+    UPRN: str
+    MATCH: int
+    UDPRN: str
+    STATUS: str
+    ADDRESS: str
+    LANGUAGE: str
+    POSTCODE: str
+    POST_TOWN: str
+    WARD_CODE: str
+    ENTRY_DATE: str
+    COUNTRY_CODE: str
+    X_COORDINATE: int
+    Y_COORDINATE: int
+    BUILDING_NAME: str
+    BLPU_STATE_CODE: str
+    BLPU_STATE_DATE: str
+    LAST_UPDATE_DATE: str
+    MATCH_DESCRIPTION: str
+    THOROUGHFARE_NAME: str
+    CLASSIFICATION_CODE: str
+    LOGICAL_STATUS_CODE: str
+    POSTAL_ADDRESS_CODE: str
+    LOCAL_CUSTODIAN_CODE: int
+    DELIVERY_POINT_SUFFIX: str
+    TOPOGRAPHY_LAYER_TOID: str
+    COUNTRY_CODE_DESCRIPTION: str
+    BLPU_STATE_CODE_DESCRIPTION: str
+    CLASSIFICATION_CODE_DESCRIPTION: str
+    POSTAL_ADDRESS_CODE_DESCRIPTION: str
+    LOCAL_CUSTODIAN_CODE_DESCRIPTION: str
+
+
+class Result(BaseModel):
+    DPA: OrdnanceSurveyResponse
+
+
+class PostcodeResponse(BaseModel):
+    results: List[Result]
diff --git a/backend/utils/ordnance_survey.py b/backend/utils/ordnance_survey.py
deleted file mode 100644
index 03a0e57b..00000000
--- a/backend/utils/ordnance_survey.py
+++ /dev/null
@@ -1,44 +0,0 @@
-import urllib.parse
-import requests
-import pandas as pd
-from utils.logger import setup_logger
-
-logger = setup_logger()
-
-
-def os_places_results_to_dataframe(data: dict) -> pd.DataFrame:
-    """
-    Flatten the OS Places API response results into a DataFrame.
-    Each result contains either a DPA or LPI record.
-    """
-    results = data.get("results", [])
-    rows = []
-    for r in results:
-        if "DPA" in r:
-            rows.append(r["DPA"])
-        elif "LPI" in r:
-            rows.append(r["LPI"])
-    return pd.DataFrame(rows)
-
-
-def lookup_os_places(postcode: str, api_key: str) -> dict:
-    """
-    Lookup a postcode using the OS Places API.
-    Returns the full API response data or an error dict.
-    """
-    if not api_key:
-        return {"error": "Ordnance Survey API key not specified", "status": 400}
-
-    encoded_postcode = urllib.parse.quote(postcode)
-    url = (
-        f"https://api.os.uk/search/places/v1/postcode?postcode={encoded_postcode}"
-        f"&dataset=DPA,LPI&key={api_key}"
-    )
-
-    response = requests.get(url)
-    if response.status_code != 200:
-        logger.error(f"OS Places API error for postcode {postcode}: {response.status_code}")
-        return {"error": "Failed to fetch address data", "status": response.status_code}
-
-    data = response.json()
-    return {"data": data, "status": 200}

From 1b9c26a2b62cb32e3bffdfbc1035954acfc6bebb Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Mon, 9 Mar 2026 12:32:20 +0000
Subject: [PATCH 17/21] back to origional format

---
 backend/app/config.py             |  1 -
 backend/ordnanceSurvey/helpers.py | 32 +++++-----------------
 backend/ordnanceSurvey/main.py    | 21 ++++++++++-----
 backend/ordnanceSurvey/types.py   | 44 -------------------------------
 4 files changed, 22 insertions(+), 76 deletions(-)
 delete mode 100644 backend/ordnanceSurvey/types.py

diff --git a/backend/app/config.py b/backend/app/config.py
index e87f8374..b5b29137 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -64,7 +64,6 @@ class Settings(BaseSettings):
     ENERGY_ASSESSMENTS_BUCKET: str = "changeme"
 
     ORDNANCE_SURVEY_API_KEY: str = "changeme"
-    PLAN_TRIGGER_BUCKET: str = "changeme"
 
     # Optional AWS creds (only required in local)
     AWS_ACCESS_KEY_ID: Optional[str] = None
diff --git a/backend/ordnanceSurvey/helpers.py b/backend/ordnanceSurvey/helpers.py
index d59060b8..fcaa148a 100644
--- a/backend/ordnanceSurvey/helpers.py
+++ b/backend/ordnanceSurvey/helpers.py
@@ -23,44 +23,26 @@ def os_places_results_to_dataframe(data: dict) -> pd.DataFrame:
     return pd.DataFrame(rows)
 
 
-import urllib.parse
-import requests
-import logging
-from pydantic import ValidationError
-
-logger = logging.getLogger(__name__)
-
-
-def lookup_os_places(postcode: str, api_key: str) -> PostcodeResponse:
+def lookup_os_places(postcode: str, api_key: str) -> dict:
     """
     Lookup a postcode using the OS Places API.
-    Returns a validated PostcodeResponse.
-    Raises exceptions on failure.
+    Returns the full API response data or an error dict.
     """
     if not api_key:
-        raise ValueError("Ordnance Survey API key not specified")
+        return {"error": "Ordnance Survey API key not specified", "status": 400}
 
     encoded_postcode = urllib.parse.quote(postcode)
-
     url = (
         f"https://api.os.uk/search/places/v1/postcode?postcode={encoded_postcode}"
         f"&dataset=DPA,LPI&key={api_key}"
     )
 
     response = requests.get(url)
-
     if response.status_code != 200:
         logger.error(
-            "OS Places API error for postcode %s: %s",
-            postcode,
-            response.status_code,
+            f"OS Places API error for postcode {postcode}: {response.status_code}"
         )
-        raise RuntimeError(f"OS Places lookup failed for postcode {postcode}")
+        return {"error": "Failed to fetch address data", "status": response.status_code}
 
-    try:
-        raw = response.json()
-        return PostcodeResponse.model_validate(raw)
-
-    except ValidationError as e:
-        logger.error("OS Places response validation failed: %s", e)
-        raise RuntimeError("Invalid response format from OS Places API") from e
+    data = response.json()
+    return {"data": data, "status": 200}
diff --git a/backend/ordnanceSurvey/main.py b/backend/ordnanceSurvey/main.py
index 835910f5..bf8cfdaf 100644
--- a/backend/ordnanceSurvey/main.py
+++ b/backend/ordnanceSurvey/main.py
@@ -1,6 +1,5 @@
 from typing import Any
 import json
-from backend.ordnanceSurvey.types import PostcodeResponse
 from utils.logger import setup_logger
 import logging
 from backend.utils.subtasks import subtask_handler
@@ -12,7 +11,7 @@ from utils.s3 import (
 from backend.utils.addressMatch import addressMatch
 from backend.app.db.connection import get_db_session
 from backend.app.db.models.postcode_search import PostcodeSearchModel
-from backend.ordnanceSurvey.helpers import (
+from backend.utils.ordnance_survey import (
     lookup_os_places,
     os_places_results_to_dataframe,
 )
@@ -28,7 +27,6 @@ logger: logging.Logger = setup_logger()
 
 
 def check_if_post_code_exists_in_db_cache(postcode):
-    postcode = "SE22 9AL"
 
     with get_db_session() as session:
         result = (
@@ -45,12 +43,16 @@ def check_if_post_code_exists_in_db_cache(postcode):
 
         # Cache miss — fetch from OS Places API
         api_key = get_settings().ORDNANCE_SURVEY_API_KEY
-        response: PostcodeResponse = lookup_os_places(postcode, api_key)
+        response = lookup_os_places(postcode, api_key)
+
+        if response.get("status") != 200 or "data" not in response:
+            logger.error(f"OS Places API failed for {postcode}: {response}")
+            return pd.DataFrame()
 
         # Save to cache
         new_record = PostcodeSearchModel(
             postcode=postcode,
-            result_data=response.results,
+            result_data=response["data"],
         )
         session.add(new_record)
         session.commit()
@@ -58,6 +60,13 @@ def check_if_post_code_exists_in_db_cache(postcode):
         return os_places_results_to_dataframe(response["data"])
 
 
+def get_ordance_survey_record(row, cache=None):
+    if cache is None:
+        cache = check_if_post_code_exists_in_db_cache(postcode)
+
+    # process cache with row
+
+
 def save_results_to_s3(
     results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
 ) -> bool:
@@ -91,7 +100,7 @@ def save_results_to_s3(
             logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}")
             return True
         else:
-            logger.error(f"Failed to save results to S3 {bucket_name}/{file_key}")
+            logger.error(f"Failed to save results to S3")
             return False
 
     except Exception as e:
diff --git a/backend/ordnanceSurvey/types.py b/backend/ordnanceSurvey/types.py
deleted file mode 100644
index 0631ff67..00000000
--- a/backend/ordnanceSurvey/types.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from pydantic import BaseModel
-from typing import List
-
-
-class OrdnanceSurveyResponse(BaseModel):
-    RPC: str
-    UPRN: str
-    MATCH: int
-    UDPRN: str
-    STATUS: str
-    ADDRESS: str
-    LANGUAGE: str
-    POSTCODE: str
-    POST_TOWN: str
-    WARD_CODE: str
-    ENTRY_DATE: str
-    COUNTRY_CODE: str
-    X_COORDINATE: int
-    Y_COORDINATE: int
-    BUILDING_NAME: str
-    BLPU_STATE_CODE: str
-    BLPU_STATE_DATE: str
-    LAST_UPDATE_DATE: str
-    MATCH_DESCRIPTION: str
-    THOROUGHFARE_NAME: str
-    CLASSIFICATION_CODE: str
-    LOGICAL_STATUS_CODE: str
-    POSTAL_ADDRESS_CODE: str
-    LOCAL_CUSTODIAN_CODE: int
-    DELIVERY_POINT_SUFFIX: str
-    TOPOGRAPHY_LAYER_TOID: str
-    COUNTRY_CODE_DESCRIPTION: str
-    BLPU_STATE_CODE_DESCRIPTION: str
-    CLASSIFICATION_CODE_DESCRIPTION: str
-    POSTAL_ADDRESS_CODE_DESCRIPTION: str
-    LOCAL_CUSTODIAN_CODE_DESCRIPTION: str
-
-
-class Result(BaseModel):
-    DPA: OrdnanceSurveyResponse
-
-
-class PostcodeResponse(BaseModel):
-    results: List[Result]

From 4d013f329592569b06c0cabc9d4d0e8899283d76 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Mon, 9 Mar 2026 13:23:20 +0000
Subject: [PATCH 18/21] dan's pr

---
 backend/address2UPRN/main.py   | 10 +++++-----
 backend/ordnanceSurvey/main.py | 20 +++++++++-----------
 backend/utils/addressMatch.py  |  8 ++++----
 3 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py
index 33cb6ff9..4c22db44 100644
--- a/backend/address2UPRN/main.py
+++ b/backend/address2UPRN/main.py
@@ -14,7 +14,7 @@ from utils.s3 import (
 )
 from datetime import datetime
 
-from backend.utils.addressMatch import addressMatch
+from backend.utils.addressMatch import AddressMatch
 
 logger = setup_logger()
 
@@ -35,7 +35,7 @@ def score_addresses(
     if column not in df.columns:
         raise ValueError(f"Missing column: {column}")
 
-    return df[column].apply(lambda x: addressMatch.score(user_address, x))
+    return df[column].apply(lambda x: AddressMatch.score(user_address, x))
 
 
 def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
@@ -127,10 +127,10 @@ def get_uprn_candidates(
 
     out = df.copy()
 
-    user_norm = addressMatch.normalise_address(user_address)
+    user_norm = AddressMatch.normalise_address(user_address)
 
     out["lexiscore"] = out[address_column].apply(
-        lambda x: addressMatch.levenshtein(user_norm, x)
+        lambda x: AddressMatch.levenshtein(user_norm, x)
     )
 
     # Normalise UPRN to string
@@ -455,7 +455,7 @@ def handler(event, context, local=False):
                 )
 
                 # Validate postcode before processing
-                if not addressMatch.is_valid_postcode(postcode):
+                if not AddressMatch.is_valid_postcode(postcode):
                     logger.warning(f"Postcode {postcode} is invalid, skipping")
                     continue
 
diff --git a/backend/ordnanceSurvey/main.py b/backend/ordnanceSurvey/main.py
index bf8cfdaf..5c8373a1 100644
--- a/backend/ordnanceSurvey/main.py
+++ b/backend/ordnanceSurvey/main.py
@@ -1,4 +1,4 @@
-from typing import Any
+from typing import Any, Optional
 import json
 from utils.logger import setup_logger
 import logging
@@ -8,7 +8,7 @@ from utils.s3 import (
     read_csv_from_s3 as read_csv_from_s3_dict,
     parse_s3_uri,
 )
-from backend.utils.addressMatch import addressMatch
+from backend.utils.addressMatch import AddressMatch
 from backend.app.db.connection import get_db_session
 from backend.app.db.models.postcode_search import PostcodeSearchModel
 from backend.utils.ordnance_survey import (
@@ -124,7 +124,7 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
 
     s3_uri: str = body.get("s3_uri", "")
     lexiscore_threshold: float = body.get("lexiscore_threshold", 0.5)
-    lexiscore_column: str = body.get("lexiscore_column", None)
+    lexiscore_column: Optional[str] = body.get("lexiscore_column", None)
     task_id: str = body.get("task_id", "")
     sub_task_id: str = body.get("sub_task_id", "")
 
@@ -158,7 +158,7 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
     # Process each postcode group at a time
     for postcode, group in grouped:
         print(f"Processing postcode: {postcode} ({len(group)} rows)")
-        valid_group = addressMatch.is_valid_postcode(postcode)
+        valid_group = AddressMatch.is_valid_postcode(postcode)
         if not valid_group:
             logger.warning(f"Postcode {postcode} is invalid, skipping")
             for idx in group.index:
@@ -203,7 +203,7 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
 
             # Score against OS Places addresses
             scores = postcode_cache["ADDRESS"].apply(
-                lambda addr: addressMatch.score(ordnancy_survey_user_input, addr)
+                lambda addr: AddressMatch.score(ordnancy_survey_user_input, addr)
             )
             best_idx = scores.idxmax()
             best_score = scores[best_idx]
@@ -215,12 +215,10 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
             df.at[idx, "ordnance_survey_lexiscore"] = best_score
 
     # Save results locally
-    df.to_csv("ordnance_survey_results.csv", index=False)
-    print(f"Results saved to ordnance_survey_results.csv ({len(df)} rows)")
+    if local:
+        df.to_csv("ordnance_survey_results.csv", index=False)
+        print(f"Results saved to ordnance_survey_results.csv ({len(df)} rows)")
 
     # Save results to S3
     if task_id and sub_task_id:
-        try:
-            save_results_to_s3(df, task_id, sub_task_id)
-        except Exception as s3_error:
-            logger.error(f"Failed to save results to S3: {s3_error}")
+        save_results_to_s3(df, task_id, sub_task_id)
diff --git a/backend/utils/addressMatch.py b/backend/utils/addressMatch.py
index b09c1672..411bb07c 100644
--- a/backend/utils/addressMatch.py
+++ b/backend/utils/addressMatch.py
@@ -4,13 +4,13 @@ from difflib import SequenceMatcher
 import requests
 
 
-class addressMatch:
+class AddressMatch:
     def __init__(self):
         return None
 
     @staticmethod
     def score(a: str, b: str) -> float:
-        score: float = addressMatch.levenshtein(a, b)
+        score: float = AddressMatch.levenshtein(a, b)
 
         return score
 
@@ -143,8 +143,8 @@ class addressMatch:
 
             return None
 
-        a_norm = addressMatch.normalise_address(a)
-        b_norm = addressMatch.normalise_address(b)
+        a_norm = AddressMatch.normalise_address(a)
+        b_norm = AddressMatch.normalise_address(b)
 
         # --- hard signal: numbers ---
         nums_a = extract_numbers(a_norm)

From 147982cb7c9939f5237522169eb38f0d4cada53b Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Mon, 9 Mar 2026 13:28:16 +0000
Subject: [PATCH 19/21] optional none

---
 backend/address2UPRN/main.py   | 7 ++++++-
 backend/ordnanceSurvey/main.py | 5 ++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py
index 4c22db44..d0ba36e6 100644
--- a/backend/address2UPRN/main.py
+++ b/backend/address2UPRN/main.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 from epc_api.client import EpcClient
 import os
 from urllib.parse import urlencode
@@ -295,7 +297,10 @@ def resolve_uprns_for_postcode_group(
 
 
 def save_results_to_s3(
-    results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
+    results_df: pd.DataFrame,
+    task_id: str,
+    sub_task_id: str,
+    bucket_name: Optional[str] = None,
 ) -> bool:
     """
     Save results DataFrame to S3 as CSV.
diff --git a/backend/ordnanceSurvey/main.py b/backend/ordnanceSurvey/main.py
index 5c8373a1..70b45079 100644
--- a/backend/ordnanceSurvey/main.py
+++ b/backend/ordnanceSurvey/main.py
@@ -68,7 +68,10 @@ def get_ordance_survey_record(row, cache=None):
 
 
 def save_results_to_s3(
-    results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
+    results_df: pd.DataFrame,
+    task_id: str,
+    sub_task_id: str,
+    bucket_name: Optional[str] = None,
 ) -> bool:
     """
     Save results DataFrame to S3 as CSV in a parent folder structure.

From cd822a3eedf6bbce7705d9622ea2c7e74ec6e6fa Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Mon, 9 Mar 2026 15:26:01 +0000
Subject: [PATCH 20/21] deploy terraform

---
 .github/workflows/deploy_terraform.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml
index 0fae2110..bde7eb21 100644
--- a/.github/workflows/deploy_terraform.yml
+++ b/.github/workflows/deploy_terraform.yml
@@ -316,3 +316,7 @@ jobs:
       stage: ${{ needs.determine_stage.outputs.stage }}
       ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
       image_digest: ${{ needs.ordnanceSurvey_image.outputs.image_digest }}
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}

From 4cf0194b7ec5fbe8cf2a0aff57d3c957df853d17 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Mon, 9 Mar 2026 15:36:33 +0000
Subject: [PATCH 21/21] missing bracket

---
 infrastructure/terraform/shared/main.tf | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf
index 905fbc78..4cf5ac46 100644
--- a/infrastructure/terraform/shared/main.tf
+++ b/infrastructure/terraform/shared/main.tf
@@ -481,6 +481,8 @@ module "ordnance_s3_read_and_write" {
 
 output "ordnance_s3_read_and_write_arn" {
   value = module.ordnance_s3_read_and_write.policy_arn
+}
+
 ################################################
 # Engine – Lambda ECR
 ################################################