From aea7251107ed1e0136b83e5ba6421b71ab0ee98b Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Thu, 21 May 2026 14:21:50 +0000
Subject: [PATCH 01/29] added files for landlord_overrides

---
 applications/landlord_overrides/Dockerfile    | 34 +++++++++++++++++++
 applications/landlord_overrides/handler.py    |  9 +++++
 .../local_handler/.env.local.example          |  5 +++
 .../local_handler/docker-compose.yml          |  9 +++++
 .../local_handler/invoke_local_lambda.py      | 16 +++++++++
 .../local_handler/run_local.sh                | 12 +++++++
 .../landlord_overrides/requirements.txt       |  4 +++
 7 files changed, 89 insertions(+)
 create mode 100644 applications/landlord_overrides/Dockerfile
 create mode 100644 applications/landlord_overrides/handler.py
 create mode 100644 applications/landlord_overrides/local_handler/.env.local.example
 create mode 100644 applications/landlord_overrides/local_handler/docker-compose.yml
 create mode 100755 applications/landlord_overrides/local_handler/invoke_local_lambda.py
 create mode 100755 applications/landlord_overrides/local_handler/run_local.sh
 create mode 100644 applications/landlord_overrides/requirements.txt

diff --git a/applications/landlord_overrides/Dockerfile b/applications/landlord_overrides/Dockerfile
new file mode 100644
index 00000000..ef19f379
--- /dev/null
+++ b/applications/landlord_overrides/Dockerfile
@@ -0,0 +1,34 @@
+FROM public.ecr.aws/lambda/python:3.11
+
+# Postgres host/port/database are baked into the image at build time from
+# the deploy workflow's --build-arg values (GitHub Actions DEV_DB_* secrets),
+# mirroring backend/postcode_splitter/handler/Dockerfile. They map onto the
+# POSTGRES_* names PostgresConfig.from_env reads. Username/password are NOT
+# baked in -- Terraform injects those as Lambda env vars from Secrets Manager.
+ARG DEV_DB_HOST
+ARG DEV_DB_PORT
+ARG DEV_DB_NAME
+
+ENV POSTGRES_HOST=${DEV_DB_HOST}
+ENV POSTGRES_PORT=${DEV_DB_PORT}
+ENV POSTGRES_DATABASE=${DEV_DB_NAME}
+
+WORKDIR /var/task
+
+COPY applications/postcode_splitter/requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the layered source the handler imports from. The new splitter pulls
+# only DDD-shaped packages — no pandas, no legacy backend/.
+COPY domain/ domain/
+COPY infrastructure/ infrastructure/
+COPY orchestration/ orchestration/
+COPY repositories/ repositories/
+COPY utilities/ utilities/
+COPY applications/ applications/
+
+# Place the handler at the Lambda task root so the runtime can resolve
+# ``main.handler`` without an extra package prefix.
+COPY applications/landlord_overrides/handler.py /var/task/main.py
+
+CMD ["main.handler"]
diff --git a/applications/landlord_overrides/handler.py b/applications/landlord_overrides/handler.py
new file mode 100644
index 00000000..f998da1d
--- /dev/null
+++ b/applications/landlord_overrides/handler.py
@@ -0,0 +1,9 @@
+from typing import Any
+
+
+def handler(
+    body: dict[str, Any],
+    context: Any,
+) -> dict[str, list[str]]:
+    print("hello world")
+    return {"hello world": ["hello world"]}
diff --git a/applications/landlord_overrides/local_handler/.env.local.example b/applications/landlord_overrides/local_handler/.env.local.example
new file mode 100644
index 00000000..a78a797f
--- /dev/null
+++ b/applications/landlord_overrides/local_handler/.env.local.example
@@ -0,0 +1,5 @@
+POSTGRES_HOST=
+POSTGRES_PORT=5432
+POSTGRES_USERNAME=
+POSTGRES_PASSWORD=
+POSTGRES_DATABASE=
\ No newline at end of file
diff --git a/applications/landlord_overrides/local_handler/docker-compose.yml b/applications/landlord_overrides/local_handler/docker-compose.yml
new file mode 100644
index 00000000..d217ded6
--- /dev/null
+++ b/applications/landlord_overrides/local_handler/docker-compose.yml
@@ -0,0 +1,9 @@
+services:
+  landlord_overrides:
+    build:
+      context: ../../../
+      dockerfile: applications/landlord_overrides/Dockerfile
+    ports:
+      - "9002:8080"
+    env_file:
+      - .env.local
diff --git a/applications/landlord_overrides/local_handler/invoke_local_lambda.py b/applications/landlord_overrides/local_handler/invoke_local_lambda.py
new file mode 100755
index 00000000..4514495f
--- /dev/null
+++ b/applications/landlord_overrides/local_handler/invoke_local_lambda.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python3
+import json
+import requests
+
+HOST = "localhost"
+PORT = "9002"
+
+LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
+
+payload = {"Records": [{"body": json.dumps({})}]}
+
+response = requests.post(LAMBDA_URL, json=payload)
+
+print("Status code:", response.status_code)
+print("Response:")
+print(response.text)
diff --git a/applications/landlord_overrides/local_handler/run_local.sh b/applications/landlord_overrides/local_handler/run_local.sh
new file mode 100755
index 00000000..345b60ee
--- /dev/null
+++ b/applications/landlord_overrides/local_handler/run_local.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+
+if [ ! -f .env.local ]; then
+  cp .env.local.example .env.local
+  echo "Created .env.local from the template — fill it in, then re-run." >&2
+  exit 1
+fi
+
+docker compose build --no-cache
+docker compose up --force-recreate
diff --git a/applications/landlord_overrides/requirements.txt b/applications/landlord_overrides/requirements.txt
new file mode 100644
index 00000000..6a85a255
--- /dev/null
+++ b/applications/landlord_overrides/requirements.txt
@@ -0,0 +1,4 @@
+boto3
+pydantic
+sqlmodel
+psycopg2-binary

From 68809a68c12cd411c0a9b26df39ca95025001f13 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Thu, 21 May 2026 14:26:05 +0000
Subject: [PATCH 02/29] renamed to landlord description overrides

---
 .../Dockerfile                                                  | 2 +-
 .../handler.py                                                  | 0
 .../local_handler/.env.local.example                            | 0
 .../local_handler/docker-compose.yml                            | 2 +-
 .../local_handler/invoke_local_lambda.py                        | 0
 .../local_handler/run_local.sh                                  | 0
 .../requirements.txt                                            | 0
 7 files changed, 2 insertions(+), 2 deletions(-)
 rename applications/{landlord_overrides => landlord_description_overrides}/Dockerfile (93%)
 rename applications/{landlord_overrides => landlord_description_overrides}/handler.py (100%)
 rename applications/{landlord_overrides => landlord_description_overrides}/local_handler/.env.local.example (100%)
 rename applications/{landlord_overrides => landlord_description_overrides}/local_handler/docker-compose.yml (64%)
 rename applications/{landlord_overrides => landlord_description_overrides}/local_handler/invoke_local_lambda.py (100%)
 rename applications/{landlord_overrides => landlord_description_overrides}/local_handler/run_local.sh (100%)
 rename applications/{landlord_overrides => landlord_description_overrides}/requirements.txt (100%)

diff --git a/applications/landlord_overrides/Dockerfile b/applications/landlord_description_overrides/Dockerfile
similarity index 93%
rename from applications/landlord_overrides/Dockerfile
rename to applications/landlord_description_overrides/Dockerfile
index ef19f379..e2456b81 100644
--- a/applications/landlord_overrides/Dockerfile
+++ b/applications/landlord_description_overrides/Dockerfile
@@ -29,6 +29,6 @@ COPY applications/ applications/
 
 # Place the handler at the Lambda task root so the runtime can resolve
 # ``main.handler`` without an extra package prefix.
-COPY applications/landlord_overrides/handler.py /var/task/main.py
+COPY applications/landlord_description_overrides/handler.py /var/task/main.py
 
 CMD ["main.handler"]
diff --git a/applications/landlord_overrides/handler.py b/applications/landlord_description_overrides/handler.py
similarity index 100%
rename from applications/landlord_overrides/handler.py
rename to applications/landlord_description_overrides/handler.py
diff --git a/applications/landlord_overrides/local_handler/.env.local.example b/applications/landlord_description_overrides/local_handler/.env.local.example
similarity index 100%
rename from applications/landlord_overrides/local_handler/.env.local.example
rename to applications/landlord_description_overrides/local_handler/.env.local.example
diff --git a/applications/landlord_overrides/local_handler/docker-compose.yml b/applications/landlord_description_overrides/local_handler/docker-compose.yml
similarity index 64%
rename from applications/landlord_overrides/local_handler/docker-compose.yml
rename to applications/landlord_description_overrides/local_handler/docker-compose.yml
index d217ded6..6ead2e33 100644
--- a/applications/landlord_overrides/local_handler/docker-compose.yml
+++ b/applications/landlord_description_overrides/local_handler/docker-compose.yml
@@ -2,7 +2,7 @@ services:
   landlord_overrides:
     build:
       context: ../../../
-      dockerfile: applications/landlord_overrides/Dockerfile
+      dockerfile: applications/landlord_description_overrides/Dockerfile
     ports:
       - "9002:8080"
     env_file:
diff --git a/applications/landlord_overrides/local_handler/invoke_local_lambda.py b/applications/landlord_description_overrides/local_handler/invoke_local_lambda.py
similarity index 100%
rename from applications/landlord_overrides/local_handler/invoke_local_lambda.py
rename to applications/landlord_description_overrides/local_handler/invoke_local_lambda.py
diff --git a/applications/landlord_overrides/local_handler/run_local.sh b/applications/landlord_description_overrides/local_handler/run_local.sh
similarity index 100%
rename from applications/landlord_overrides/local_handler/run_local.sh
rename to applications/landlord_description_overrides/local_handler/run_local.sh
diff --git a/applications/landlord_overrides/requirements.txt b/applications/landlord_description_overrides/requirements.txt
similarity index 100%
rename from applications/landlord_overrides/requirements.txt
rename to applications/landlord_description_overrides/requirements.txt

From 4830f82b589da75760aafd1f1c878bd02b956f31 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Thu, 21 May 2026 16:32:15 +0000
Subject: [PATCH 03/29] test: add failing tests for
 get_col_to_description_mappings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drive the contract for LandlordDescriptionOverridesOrchestrator.
get_col_to_description_mappings: given a list of UserAddress sharing
the same landlord_additional_info keys, return each key mapped to the
list of values found across all addresses.

Tests are red — the method still raises NotImplementedError.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ...lord_description_overrides_orchestrator.py | 19 +++++
 ...lord_description_overrides_orchestrator.py | 69 +++++++++++++++++++
 2 files changed, 88 insertions(+)
 create mode 100644 orchestration/landlord_description_overrides_orchestrator.py
 create mode 100644 tests/orchestration/test_landlord_description_overrides_orchestrator.py

diff --git a/orchestration/landlord_description_overrides_orchestrator.py b/orchestration/landlord_description_overrides_orchestrator.py
new file mode 100644
index 00000000..fb3fc61b
--- /dev/null
+++ b/orchestration/landlord_description_overrides_orchestrator.py
@@ -0,0 +1,19 @@
+from repositories.user_address.user_address_repository import UserAddressRepository
+from domain.addresses.user_address import UserAddress
+
+
+class LandlordDescriptionOverridesOrchestrator:
+    def __init__(self, user_address_repo: UserAddressRepository) -> None:
+        self._user_address_repo = user_address_repo
+
+    def get_user_address(
+        self,
+        input_s3_uri: str,
+    ) -> list[UserAddress]:
+        return self._user_address_repo.load_batch(input_s3_uri)
+
+    def get_col_to_description_mappings(
+        self, list_of_user_address: list[UserAddress]
+    ) -> dict[str, list[str]]:
+
+        raise NotImplementedError()
diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
new file mode 100644
index 00000000..5660bf78
--- /dev/null
+++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
@@ -0,0 +1,69 @@
+from __future__ import annotations
+
+from domain.addresses.user_address import UserAddress
+from domain.postcode import Postcode
+from orchestration.landlord_description_overrides_orchestrator import (
+    LandlordDescriptionOverridesOrchestrator,
+)
+from repositories.user_address.user_address_repository import UserAddressRepository
+
+
+class _StubUserAddressRepository(UserAddressRepository):
+    """``get_col_to_description_mappings`` never touches the repo."""
+
+    def load_batch(self, s3_uri: str) -> list[UserAddress]:
+        raise NotImplementedError()
+
+    def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str:
+        raise NotImplementedError()
+
+
+def _make_user_address(landlord_additional_info: dict[str, str]) -> UserAddress:
+    return UserAddress(
+        user_address="1 High St",
+        postcode=Postcode("AA1 1AA"),
+        landlord_additional_info=landlord_additional_info,
+    )
+
+
+def _orchestrator() -> LandlordDescriptionOverridesOrchestrator:
+    return LandlordDescriptionOverridesOrchestrator(
+        user_address_repo=_StubUserAddressRepository()
+    )
+
+
+def test_collects_every_value_per_shared_key() -> None:
+    # arrange: every address carries the same keys, all values distinct.
+    addresses = [
+        _make_user_address({"description": "cosy", "condition": "new"}),
+        _make_user_address({"description": "spacious", "condition": "worn"}),
+        _make_user_address({"description": "bright", "condition": "fair"}),
+    ]
+
+    # act
+    mappings = _orchestrator().get_col_to_description_mappings(addresses)
+
+    # assert
+    assert mappings == {
+        "description": ["cosy", "spacious", "bright"],
+        "condition": ["new", "worn", "fair"],
+    }
+
+
+def test_empty_address_list_yields_empty_mapping() -> None:
+    # arrange / act
+    mappings = _orchestrator().get_col_to_description_mappings([])
+
+    # assert
+    assert mappings == {}
+
+
+def test_single_address_yields_single_value_per_key() -> None:
+    # arrange
+    addresses = [_make_user_address({"description": "cosy"})]
+
+    # act
+    mappings = _orchestrator().get_col_to_description_mappings(addresses)
+
+    # assert
+    assert mappings == {"description": ["cosy"]}

From b14f98788e05b6c4964817be27fc83f35725b4e5 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Thu, 21 May 2026 16:32:50 +0000
Subject: [PATCH 04/29] added landlord orchestration

---
 .../landlord_description_overrides/handler.py | 38 ++++++++++++++++++-
 asset_list/app.py                             | 13 +++----
 domain/addresses/user_address.py              |  4 +-
 .../user_address_csv_s3_repository.py         |  7 +++-
 tests/domain/addresses/test_user_address.py   | 14 +++----
 .../test_user_address_csv_s3_repository.py    | 20 +++++++---
 6 files changed, 72 insertions(+), 24 deletions(-)

diff --git a/applications/landlord_description_overrides/handler.py b/applications/landlord_description_overrides/handler.py
index f998da1d..003bd4d3 100644
--- a/applications/landlord_description_overrides/handler.py
+++ b/applications/landlord_description_overrides/handler.py
@@ -1,9 +1,45 @@
 from typing import Any
+import boto3
+from orchestration.landlord_description_overrides_orchestrator import (
+    LandlordDescriptionOverridesOrchestrator,
+)
+from infrastructure.csv_s3_client import CsvS3Client
+from repositories.user_address.user_address_csv_s3_repository import (
+    UserAddressCsvS3Repository,
+)
 
 
 def handler(
     body: dict[str, Any],
     context: Any,
 ) -> dict[str, list[str]]:
-    print("hello world")
+
+    s3_uri = "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv"
+    bucket = "retrofit-data-dev"
+
+    # boto3.client is overloaded per-service in the installed stubs; cast
+    # to Any so the strict-mode checker treats it as opaque.
+    boto3_client: Any = (
+        boto3.client
+    )  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+    boto_s3: Any = boto3_client("s3")
+
+    csv_client = CsvS3Client(boto_s3, bucket)
+    user_address_repo = UserAddressCsvS3Repository(csv_client, bucket)
+
+    orchestrator = LandlordDescriptionOverridesOrchestrator(
+        user_address_repo=user_address_repo,
+    )
+
+    list_of_user_address = orchestrator.get_user_address(input_s3_uri=s3_uri)
+
+    for each_user_address in list_of_user_address:
+        print(each_user_address.landlord_additional_info.keys())
+        break
+
+    # Read csv of user input
+    # get the column and unique variations of each description
+    # { walls: "wall variation 1", "wall varition 2"}
+    # Call chatgpt(input from landlord, our way of understanding the mapping) Retrun -> lanlordMapped
+
     return {"hello world": ["hello world"]}
diff --git a/asset_list/app.py b/asset_list/app.py
index 424f4df6..aef410e6 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -79,17 +79,17 @@ def app():
     """
 
     data_folder = "/workspaces/model/asset_list"
-    data_filename = "hyde.xlsx"
-    sheet_name = "AddressProfilingResults"
-    postcode_column = "Postcode"
-    address1_column = "Address"
+    data_filename = "asset_list (8).xlsx"
+    sheet_name = "Standardised Asset List"
+    postcode_column = "postcode"
+    address1_column = "domna_address_1"
     address1_method = None
-    fulladdress_column = "Postcode"
+    fulladdress_column = "domna_address_1"
     address_cols_to_concat = []
     missing_postcodes_method = None
     landlord_year_built = None
     landlord_os_uprn = None
-    landlord_property_type = "Property Type"  # Good to include if landlord gave
+    landlord_property_type = "landlord_property_id"  # Good to include if landlord gave
     landlord_built_form = None  # Good to include if landlord gave
     landlord_wall_construction = None
     landlord_roof_construction = None
@@ -468,4 +468,3 @@ def app():
                 asset_list.duplicated_addresses.to_excel(
                     writer, sheet_name="Duplicate Properties", index=False
                 )
-
diff --git a/domain/addresses/user_address.py b/domain/addresses/user_address.py
index 9a28751b..b6deb2e4 100644
--- a/domain/addresses/user_address.py
+++ b/domain/addresses/user_address.py
@@ -15,4 +15,6 @@ class UserAddress:
     user_address: str
     postcode: Postcode
     internal_reference: Optional[str] = None
-    source_row: dict[str, str] = field(default_factory=_empty_source_row, compare=False)
+    landlord_additional_info: dict[str, str] = field(
+        default_factory=_empty_source_row, compare=False
+    )
diff --git a/repositories/user_address/user_address_csv_s3_repository.py b/repositories/user_address/user_address_csv_s3_repository.py
index 058fd5a5..0b54d360 100644
--- a/repositories/user_address/user_address_csv_s3_repository.py
+++ b/repositories/user_address/user_address_csv_s3_repository.py
@@ -43,14 +43,17 @@ class UserAddressCsvS3Repository(UserAddressRepository):
                     user_address=user_address,
                     postcode=Postcode(postcode),
                     internal_reference=internal_reference,
-                    source_row=row,
+                    landlord_additional_info=row,
                 )
             )
         return addresses
 
     def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str:
         rows: list[dict[str, str]] = [
-            {**addr.source_row, _POSTCODE_CLEAN_COLUMN: str(addr.postcode)}
+            {
+                **addr.landlord_additional_info,
+                _POSTCODE_CLEAN_COLUMN: str(addr.postcode),
+            }
             for addr in addresses
         ]
 
diff --git a/tests/domain/addresses/test_user_address.py b/tests/domain/addresses/test_user_address.py
index 8d092df3..21e5050d 100644
--- a/tests/domain/addresses/test_user_address.py
+++ b/tests/domain/addresses/test_user_address.py
@@ -17,9 +17,7 @@ def test_user_address_preserves_user_address_verbatim() -> None:
     # The free-text user_address string is intentionally NOT normalised --
     # only the postcode is canonicalised, and that happens inside Postcode.
     # act
-    addr = UserAddress(
-        user_address="  1 The   Street  ", postcode=Postcode("SW1A1AA")
-    )
+    addr = UserAddress(user_address="  1 The   Street  ", postcode=Postcode("SW1A1AA"))
     # assert
     assert addr.user_address == "  1 The   Street  "
 
@@ -64,7 +62,7 @@ def test_user_address_source_row_defaults_to_empty_dict() -> None:
     # act
     addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
     # assert
-    assert addr.source_row == {}
+    assert addr.landlord_additional_info == {}
 
 
 def test_user_address_carries_source_row() -> None:
@@ -74,10 +72,10 @@ def test_user_address_carries_source_row() -> None:
     addr = UserAddress(
         user_address="1 The Street",
         postcode=Postcode("SW1A 1AA"),
-        source_row=row,
+        landlord_additional_info=row,
     )
     # assert
-    assert addr.source_row == row
+    assert addr.landlord_additional_info == row
 
 
 def test_user_address_equality_ignores_source_row() -> None:
@@ -87,12 +85,12 @@ def test_user_address_equality_ignores_source_row() -> None:
     a = UserAddress(
         user_address="1 The Street",
         postcode=Postcode("SW1A1AA"),
-        source_row={"x": "1"},
+        landlord_additional_info={"x": "1"},
     )
     b = UserAddress(
         user_address="1 The Street",
         postcode=Postcode("SW1A1AA"),
-        source_row={"y": "2"},
+        landlord_additional_info={"y": "2"},
     )
     # act / assert
     assert a == b
diff --git a/tests/repositories/user_address/test_user_address_csv_s3_repository.py b/tests/repositories/user_address/test_user_address_csv_s3_repository.py
index 9ffb250a..0f630923 100644
--- a/tests/repositories/user_address/test_user_address_csv_s3_repository.py
+++ b/tests/repositories/user_address/test_user_address_csv_s3_repository.py
@@ -124,7 +124,7 @@ def test_load_batch_captures_full_source_row(
     addresses = repo.load_batch(uri)
 
     # assert
-    assert addresses[0].source_row == row
+    assert addresses[0].landlord_additional_info == row
 
 
 def test_load_batch_raises_when_postcode_column_absent(
@@ -154,7 +154,9 @@ def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
 
     # act
     saved_uri = repo.save_batch(addresses, "tasks/passthrough")
-    saved_rows = repo._csv_client.read_rows(saved_uri)  # pyright: ignore[reportPrivateUsage]
+    saved_rows = repo._csv_client.read_rows(
+        saved_uri
+    )  # pyright: ignore[reportPrivateUsage]
 
     # assert
     assert len(saved_rows) == 1
@@ -174,7 +176,10 @@ def test_save_batch_returns_uri_under_path_prefix(
         UserAddress(
             user_address="1 High Street",
             postcode=Postcode("SW1A 1AA"),
-            source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"},
+            landlord_additional_info={
+                "Address 1": "1 High Street",
+                "postcode": "SW1A 1AA",
+            },
         ),
     ]
 
@@ -207,7 +212,9 @@ def test_save_then_reload_round_trip_preserves_columns(
 
     # act
     saved_uri = repo.save_batch(addresses, "tasks/round-trip")
-    saved_rows = repo._csv_client.read_rows(saved_uri)  # pyright: ignore[reportPrivateUsage]
+    saved_rows = repo._csv_client.read_rows(
+        saved_uri
+    )  # pyright: ignore[reportPrivateUsage]
 
     # assert
     # Original columns come back verbatim; postcode_clean is the only addition.
@@ -225,7 +232,10 @@ def test_save_batch_uses_unique_filename_per_call(
         UserAddress(
             user_address="1 High Street",
             postcode=Postcode("SW1A 1AA"),
-            source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"},
+            landlord_additional_info={
+                "Address 1": "1 High Street",
+                "postcode": "SW1A 1AA",
+            },
         ),
     ]
 

From c833a3c91b5a1615d418694f779ab4c721d1a3e5 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Thu, 21 May 2026 16:33:54 +0000
Subject: [PATCH 05/29] feat: implement get_col_to_description_mappings

Collect, per shared landlord_additional_info key, the list of values
across all UserAddress entries. Preserves first-seen key order and
input order of values.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../landlord_description_overrides_orchestrator.py         | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/orchestration/landlord_description_overrides_orchestrator.py b/orchestration/landlord_description_overrides_orchestrator.py
index fb3fc61b..0751975a 100644
--- a/orchestration/landlord_description_overrides_orchestrator.py
+++ b/orchestration/landlord_description_overrides_orchestrator.py
@@ -15,5 +15,8 @@ class LandlordDescriptionOverridesOrchestrator:
     def get_col_to_description_mappings(
         self, list_of_user_address: list[UserAddress]
     ) -> dict[str, list[str]]:
-
-        raise NotImplementedError()
+        mappings: dict[str, list[str]] = {}
+        for user_address in list_of_user_address:
+            for key, value in user_address.landlord_additional_info.items():
+                mappings.setdefault(key, []).append(value)
+        return mappings

From 8baa4c82aace31092bb9940a8888512589ec7439 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Thu, 21 May 2026 16:57:14 +0000
Subject: [PATCH 06/29] save correct progress

---
 .../landlord_description_overrides/handler.py | 11 ++++---
 infrastructure/csv_s3_client.py               | 31 ++++++++++++++++-
 ...lord_description_overrides_orchestrator.py |  6 ++--
 tests/infrastructure/test_csv_s3_client.py    | 33 +++++++++++++++++++
 ...lord_description_overrides_orchestrator.py | 21 ++++++++++--
 5 files changed, 91 insertions(+), 11 deletions(-)

diff --git a/applications/landlord_description_overrides/handler.py b/applications/landlord_description_overrides/handler.py
index 003bd4d3..65297dac 100644
--- a/applications/landlord_description_overrides/handler.py
+++ b/applications/landlord_description_overrides/handler.py
@@ -7,6 +7,7 @@ from infrastructure.csv_s3_client import CsvS3Client
 from repositories.user_address.user_address_csv_s3_repository import (
     UserAddressCsvS3Repository,
 )
+from domain.addresses.user_address import UserAddress
 
 
 def handler(
@@ -31,11 +32,13 @@ def handler(
         user_address_repo=user_address_repo,
     )
 
-    list_of_user_address = orchestrator.get_user_address(input_s3_uri=s3_uri)
+    list_of_user_address: list[UserAddress] = orchestrator.get_user_address(
+        input_s3_uri=s3_uri
+    )
 
-    for each_user_address in list_of_user_address:
-        print(each_user_address.landlord_additional_info.keys())
-        break
+    col_to_desc_map = orchestrator.get_col_to_description_mappings(
+        list_of_user_address=list_of_user_address
+    )
 
     # Read csv of user input
     # get the column and unique variations of each description
diff --git a/infrastructure/csv_s3_client.py b/infrastructure/csv_s3_client.py
index 8af8de73..d058ba53 100644
--- a/infrastructure/csv_s3_client.py
+++ b/infrastructure/csv_s3_client.py
@@ -5,6 +5,30 @@ from infrastructure.s3_client import S3Client
 from infrastructure.s3_uri import parse_s3_uri
 
 
+def _dedupe_fieldnames(fieldnames: list[str]) -> list[str]:
+    """Disambiguate repeated CSV headers by appending an index.
+
+    The first occurrence keeps its name; each later one becomes
+    ``name_1``, ``name_2`` … so duplicate columns survive as distinct
+    keys instead of collapsing onto one (last-wins) dict entry.
+    """
+    deduped: list[str] = []
+    counts: dict[str, int] = {}
+    for name in fieldnames:
+        if name not in counts:
+            counts[name] = 0
+            deduped.append(name)
+            continue
+        counts[name] += 1
+        candidate = f"{name}_{counts[name]}"
+        while candidate in counts:
+            counts[name] += 1
+            candidate = f"{name}_{counts[name]}"
+        counts[candidate] = 0
+        deduped.append(candidate)
+    return deduped
+
+
 class CsvS3Client(S3Client):
     def read_rows(self, s3_uri: str) -> list[dict[str, str]]:
         bucket, key = parse_s3_uri(s3_uri)
@@ -19,7 +43,12 @@ class CsvS3Client(S3Client):
             # Some uploads are Windows-1252 (e.g. £ as byte 0xA3), not UTF-8.
             text = raw.decode("cp1252")
 
-        reader = csv.DictReader(StringIO(text))
+        buffer = StringIO(text)
+        header = next(csv.reader(buffer), None)
+        if header is None:
+            return []
+        fieldnames = _dedupe_fieldnames(header)
+        reader = csv.DictReader(buffer, fieldnames=fieldnames)
         return [dict(row) for row in reader]
 
     def save_rows(self, rows: list[dict[str, str]], key: str) -> str:
diff --git a/orchestration/landlord_description_overrides_orchestrator.py b/orchestration/landlord_description_overrides_orchestrator.py
index 0751975a..7f3c3396 100644
--- a/orchestration/landlord_description_overrides_orchestrator.py
+++ b/orchestration/landlord_description_overrides_orchestrator.py
@@ -14,9 +14,9 @@ class LandlordDescriptionOverridesOrchestrator:
 
     def get_col_to_description_mappings(
         self, list_of_user_address: list[UserAddress]
-    ) -> dict[str, list[str]]:
-        mappings: dict[str, list[str]] = {}
+    ) -> dict[str, set[str]]:
+        mappings: dict[str, set[str]] = {}
         for user_address in list_of_user_address:
             for key, value in user_address.landlord_additional_info.items():
-                mappings.setdefault(key, []).append(value)
+                mappings.setdefault(key, set()).add(value)
         return mappings
diff --git a/tests/infrastructure/test_csv_s3_client.py b/tests/infrastructure/test_csv_s3_client.py
index 30e27164..e7ec7eab 100644
--- a/tests/infrastructure/test_csv_s3_client.py
+++ b/tests/infrastructure/test_csv_s3_client.py
@@ -49,3 +49,36 @@ def test_read_rows_rejects_wrong_bucket(csv_client: CsvS3Client) -> None:
     # act / assert
     with pytest.raises(ValueError, match="does not match client bucket"):
         csv_client.read_rows("s3://other-bucket/uploads/addresses.csv")
+
+
+def test_read_rows_indexes_duplicate_column_names(csv_client: CsvS3Client) -> None:
+    # arrange: the Hyde export has two columns both headed "Walls" — a
+    # description and a score. Without disambiguation csv.DictReader would
+    # collapse them onto one key and the description would be lost.
+    raw = "Address 1,Walls,Roofs,Walls\n1 High St,Cavity: Filled,Pitched 300mm,9.6\n"
+    uri = csv_client.put_object("uploads/dup.csv", raw.encode("utf-8"))
+
+    # act
+    rows = csv_client.read_rows(uri)
+
+    # assert: the first occurrence keeps its name, the second gets an index.
+    assert rows == [
+        {
+            "Address 1": "1 High St",
+            "Walls": "Cavity: Filled",
+            "Roofs": "Pitched 300mm",
+            "Walls_1": "9.6",
+        }
+    ]
+
+
+def test_read_rows_indexes_each_repeat_of_a_column(csv_client: CsvS3Client) -> None:
+    # arrange: three columns sharing one header.
+    raw = "Walls,Walls,Walls\nfirst,second,third\n"
+    uri = csv_client.put_object("uploads/triple.csv", raw.encode("utf-8"))
+
+    # act
+    rows = csv_client.read_rows(uri)
+
+    # assert
+    assert rows == [{"Walls": "first", "Walls_1": "second", "Walls_2": "third"}]
diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
index 5660bf78..4f241423 100644
--- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py
+++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
@@ -45,11 +45,26 @@ def test_collects_every_value_per_shared_key() -> None:
 
     # assert
     assert mappings == {
-        "description": ["cosy", "spacious", "bright"],
-        "condition": ["new", "worn", "fair"],
+        "description": {"cosy", "spacious", "bright"},
+        "condition": {"new", "worn", "fair"},
     }
 
 
+def test_repeated_values_collapse_to_one_variant() -> None:
+    # arrange: two addresses share the same wall description.
+    addresses = [
+        _make_user_address({"description": "cosy"}),
+        _make_user_address({"description": "cosy"}),
+        _make_user_address({"description": "bright"}),
+    ]
+
+    # act
+    mappings = _orchestrator().get_col_to_description_mappings(addresses)
+
+    # assert: a set keeps one entry per distinct variant.
+    assert mappings == {"description": {"cosy", "bright"}}
+
+
 def test_empty_address_list_yields_empty_mapping() -> None:
     # arrange / act
     mappings = _orchestrator().get_col_to_description_mappings([])
@@ -66,4 +81,4 @@ def test_single_address_yields_single_value_per_key() -> None:
     mappings = _orchestrator().get_col_to_description_mappings(addresses)
 
     # assert
-    assert mappings == {"description": ["cosy"]}
+    assert mappings == {"description": {"cosy"}}

From 94cbf5f5166df1dff1030e6788243197036d13e0 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Thu, 21 May 2026 16:59:57 +0000
Subject: [PATCH 07/29] changed useraddress landlordasset list

---
 .../landlord_description_overrides/handler.py |  4 +--
 domain/addresses/postcode_batching.py         | 14 +++++-----
 domain/addresses/user_address.py              |  2 +-
 ...lord_description_overrides_orchestrator.py |  9 ++++---
 .../user_address_csv_s3_repository.py         | 10 +++----
 .../user_address/user_address_repository.py   |  8 +++---
 .../addresses/test_postcode_batching.py       | 10 +++----
 tests/domain/addresses/test_user_address.py   | 26 ++++++++++---------
 ...lord_description_overrides_orchestrator.py | 25 ++++++++++++++----
 .../test_user_address_csv_s3_repository.py    |  6 ++---
 10 files changed, 66 insertions(+), 48 deletions(-)

diff --git a/applications/landlord_description_overrides/handler.py b/applications/landlord_description_overrides/handler.py
index 65297dac..2655beb9 100644
--- a/applications/landlord_description_overrides/handler.py
+++ b/applications/landlord_description_overrides/handler.py
@@ -7,7 +7,7 @@ from infrastructure.csv_s3_client import CsvS3Client
 from repositories.user_address.user_address_csv_s3_repository import (
     UserAddressCsvS3Repository,
 )
-from domain.addresses.user_address import UserAddress
+from domain.addresses.user_address import LandlordAssetList
 
 
 def handler(
@@ -32,7 +32,7 @@ def handler(
         user_address_repo=user_address_repo,
     )
 
-    list_of_user_address: list[UserAddress] = orchestrator.get_user_address(
+    list_of_user_address: list[LandlordAssetList] = orchestrator.get_user_address(
         input_s3_uri=s3_uri
     )
 
diff --git a/domain/addresses/postcode_batching.py b/domain/addresses/postcode_batching.py
index 44e4d967..d4d04b00 100644
--- a/domain/addresses/postcode_batching.py
+++ b/domain/addresses/postcode_batching.py
@@ -2,21 +2,21 @@ from __future__ import annotations
 
 from collections.abc import Iterable, Iterator
 
-from domain.addresses.user_address import UserAddress
+from domain.addresses.user_address import LandlordAssetList
 from domain.postcode import Postcode
 
 
 def iter_postcode_grouped_batches(
-    addresses: Iterable[UserAddress],
+    addresses: Iterable[LandlordAssetList],
     *,
     max_batch_size: int = 500,
-) -> Iterator[list[UserAddress]]:
+) -> Iterator[list[LandlordAssetList]]:
     if max_batch_size < 1:
         raise ValueError("max_batch_size must be >= 1")
 
     groups = _group_by_postcode_in_order(addresses)
 
-    buffer: list[UserAddress] = []
+    buffer: list[LandlordAssetList] = []
     for group in groups.values():
         group_len = len(group)
 
@@ -43,9 +43,9 @@ def iter_postcode_grouped_batches(
 
 
 def _group_by_postcode_in_order(
-    addresses: Iterable[UserAddress],
-) -> dict[Postcode, list[UserAddress]]:
-    groups: dict[Postcode, list[UserAddress]] = {}
+    addresses: Iterable[LandlordAssetList],
+) -> dict[Postcode, list[LandlordAssetList]]:
+    groups: dict[Postcode, list[LandlordAssetList]] = {}
     for address in addresses:
         groups.setdefault(address.postcode, []).append(address)
     return groups
diff --git a/domain/addresses/user_address.py b/domain/addresses/user_address.py
index b6deb2e4..c93f46e5 100644
--- a/domain/addresses/user_address.py
+++ b/domain/addresses/user_address.py
@@ -11,7 +11,7 @@ def _empty_source_row() -> dict[str, str]:
 
 
 @dataclass(frozen=True)
-class UserAddress:
+class LandlordAssetList:
     user_address: str
     postcode: Postcode
     internal_reference: Optional[str] = None
diff --git a/orchestration/landlord_description_overrides_orchestrator.py b/orchestration/landlord_description_overrides_orchestrator.py
index 7f3c3396..9321994d 100644
--- a/orchestration/landlord_description_overrides_orchestrator.py
+++ b/orchestration/landlord_description_overrides_orchestrator.py
@@ -1,5 +1,5 @@
 from repositories.user_address.user_address_repository import UserAddressRepository
-from domain.addresses.user_address import UserAddress
+from domain.addresses.user_address import LandlordAssetList
 
 
 class LandlordDescriptionOverridesOrchestrator:
@@ -9,14 +9,15 @@ class LandlordDescriptionOverridesOrchestrator:
     def get_user_address(
         self,
         input_s3_uri: str,
-    ) -> list[UserAddress]:
+    ) -> list[LandlordAssetList]:
         return self._user_address_repo.load_batch(input_s3_uri)
 
     def get_col_to_description_mappings(
-        self, list_of_user_address: list[UserAddress]
+        self, list_of_user_address: list[LandlordAssetList]
     ) -> dict[str, set[str]]:
         mappings: dict[str, set[str]] = {}
         for user_address in list_of_user_address:
             for key, value in user_address.landlord_additional_info.items():
-                mappings.setdefault(key, set()).add(value)
+                # Lower-case so case-only typos collapse to one variant.
+                mappings.setdefault(key, set()).add(value.lower())
         return mappings
diff --git a/repositories/user_address/user_address_csv_s3_repository.py b/repositories/user_address/user_address_csv_s3_repository.py
index 0b54d360..612a52ec 100644
--- a/repositories/user_address/user_address_csv_s3_repository.py
+++ b/repositories/user_address/user_address_csv_s3_repository.py
@@ -4,7 +4,7 @@ import uuid
 from datetime import datetime, timezone
 from typing import Optional
 
-from domain.addresses.user_address import UserAddress
+from domain.addresses.user_address import LandlordAssetList
 from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
 from repositories.user_address.user_address_repository import UserAddressRepository
@@ -20,14 +20,14 @@ class UserAddressCsvS3Repository(UserAddressRepository):
         self._csv_client = csv_client
         self._bucket = bucket
 
-    def load_batch(self, s3_uri: str) -> list[UserAddress]:
+    def load_batch(self, s3_uri: str) -> list[LandlordAssetList]:
         rows = self._csv_client.read_rows(s3_uri)
         if rows and _POSTCODE_COLUMN not in rows[0]:
             raise ValueError(
                 f"Input CSV {s3_uri} has no {_POSTCODE_COLUMN!r} column; "
                 f"columns present: {sorted(rows[0])}"
             )
-        addresses: list[UserAddress] = []
+        addresses: list[LandlordAssetList] = []
         for row in rows:
             parts = [
                 row[col].strip()
@@ -39,7 +39,7 @@ class UserAddressCsvS3Repository(UserAddressRepository):
             raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip()
             internal_reference: Optional[str] = raw_ref or None
             addresses.append(
-                UserAddress(
+                LandlordAssetList(
                     user_address=user_address,
                     postcode=Postcode(postcode),
                     internal_reference=internal_reference,
@@ -48,7 +48,7 @@ class UserAddressCsvS3Repository(UserAddressRepository):
             )
         return addresses
 
-    def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str:
+    def save_batch(self, addresses: list[LandlordAssetList], path_prefix: str) -> str:
         rows: list[dict[str, str]] = [
             {
                 **addr.landlord_additional_info,
diff --git a/repositories/user_address/user_address_repository.py b/repositories/user_address/user_address_repository.py
index b2c0f866..b89247c5 100644
--- a/repositories/user_address/user_address_repository.py
+++ b/repositories/user_address/user_address_repository.py
@@ -2,12 +2,14 @@ from __future__ import annotations
 
 from abc import ABC, abstractmethod
 
-from domain.addresses.user_address import UserAddress
+from domain.addresses.user_address import LandlordAssetList
 
 
 class UserAddressRepository(ABC):
     @abstractmethod
-    def load_batch(self, s3_uri: str) -> list[UserAddress]: ...
+    def load_batch(self, s3_uri: str) -> list[LandlordAssetList]: ...
 
     @abstractmethod
-    def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str: ...
+    def save_batch(
+        self, addresses: list[LandlordAssetList], path_prefix: str
+    ) -> str: ...
diff --git a/tests/domain/addresses/test_postcode_batching.py b/tests/domain/addresses/test_postcode_batching.py
index 8ffcf1b5..82e5ced7 100644
--- a/tests/domain/addresses/test_postcode_batching.py
+++ b/tests/domain/addresses/test_postcode_batching.py
@@ -1,13 +1,13 @@
 import pytest
 
 from domain.addresses.postcode_batching import iter_postcode_grouped_batches
-from domain.addresses.user_address import UserAddress
+from domain.addresses.user_address import LandlordAssetList
 from domain.postcode import Postcode
 
 
-def _addrs(postcode: str, n: int) -> list[UserAddress]:
+def _addrs(postcode: str, n: int) -> list[LandlordAssetList]:
     return [
-        UserAddress(
+        LandlordAssetList(
             user_address=f"{i} {postcode} Street", postcode=Postcode(postcode)
         )
         for i in range(n)
@@ -74,9 +74,7 @@ def test_oversize_group_flushes_existing_buffer_first() -> None:
     big = _addrs("BB2 2BB", 7)
     tail = _addrs("CC3 3CC", 1)
     # act
-    batches = list(
-        iter_postcode_grouped_batches(small + big + tail, max_batch_size=5)
-    )
+    batches = list(iter_postcode_grouped_batches(small + big + tail, max_batch_size=5))
     # assert
     assert len(batches) == 3
     assert [str(a.postcode) for a in batches[0]] == ["AA11AA", "AA11AA"]
diff --git a/tests/domain/addresses/test_user_address.py b/tests/domain/addresses/test_user_address.py
index 21e5050d..39c52283 100644
--- a/tests/domain/addresses/test_user_address.py
+++ b/tests/domain/addresses/test_user_address.py
@@ -2,13 +2,13 @@ import dataclasses
 
 import pytest
 
-from domain.addresses.user_address import UserAddress
+from domain.addresses.user_address import LandlordAssetList
 from domain.postcode import Postcode
 
 
 def test_user_address_holds_postcode_value_object() -> None:
     # act
-    addr = UserAddress(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
     # assert
     assert addr.postcode == Postcode("SW1A1AA")
 
@@ -17,21 +17,23 @@ def test_user_address_preserves_user_address_verbatim() -> None:
     # The free-text user_address string is intentionally NOT normalised --
     # only the postcode is canonicalised, and that happens inside Postcode.
     # act
-    addr = UserAddress(user_address="  1 The   Street  ", postcode=Postcode("SW1A1AA"))
+    addr = LandlordAssetList(
+        user_address="  1 The   Street  ", postcode=Postcode("SW1A1AA")
+    )
     # assert
     assert addr.user_address == "  1 The   Street  "
 
 
 def test_user_address_internal_reference_defaults_to_none() -> None:
     # act
-    addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
     # assert
     assert addr.internal_reference is None
 
 
 def test_user_address_internal_reference_accepted() -> None:
     # act
-    addr = UserAddress(
+    addr = LandlordAssetList(
         user_address="1 The Street",
         postcode=Postcode("SW1A1AA"),
         internal_reference="cust-42",
@@ -42,7 +44,7 @@ def test_user_address_internal_reference_accepted() -> None:
 
 def test_user_address_is_frozen() -> None:
     # arrange
-    addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
     # act / assert
     with pytest.raises(dataclasses.FrozenInstanceError):
         addr.postcode = Postcode("OTHER")  # type: ignore[misc]
@@ -52,15 +54,15 @@ def test_user_address_equality_uses_canonical_postcode() -> None:
     # Postcode sanitises eagerly, so addresses built from different surface
     # forms of the same postcode compare equal.
     # arrange
-    a = UserAddress(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
-    b = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+    a = LandlordAssetList(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    b = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
     # act / assert
     assert a == b
 
 
 def test_user_address_source_row_defaults_to_empty_dict() -> None:
     # act
-    addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
     # assert
     assert addr.landlord_additional_info == {}
 
@@ -69,7 +71,7 @@ def test_user_address_carries_source_row() -> None:
     # arrange
     row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
     # act
-    addr = UserAddress(
+    addr = LandlordAssetList(
         user_address="1 The Street",
         postcode=Postcode("SW1A 1AA"),
         landlord_additional_info=row,
@@ -82,12 +84,12 @@ def test_user_address_equality_ignores_source_row() -> None:
     # source_row is excluded from equality (and hashing): identity stays
     # defined by the parsed fields.
     # arrange
-    a = UserAddress(
+    a = LandlordAssetList(
         user_address="1 The Street",
         postcode=Postcode("SW1A1AA"),
         landlord_additional_info={"x": "1"},
     )
-    b = UserAddress(
+    b = LandlordAssetList(
         user_address="1 The Street",
         postcode=Postcode("SW1A1AA"),
         landlord_additional_info={"y": "2"},
diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
index 4f241423..c7197071 100644
--- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py
+++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from domain.addresses.user_address import UserAddress
+from domain.addresses.user_address import LandlordAssetList
 from domain.postcode import Postcode
 from orchestration.landlord_description_overrides_orchestrator import (
     LandlordDescriptionOverridesOrchestrator,
@@ -11,15 +11,15 @@ from repositories.user_address.user_address_repository import UserAddressReposit
 class _StubUserAddressRepository(UserAddressRepository):
     """``get_col_to_description_mappings`` never touches the repo."""
 
-    def load_batch(self, s3_uri: str) -> list[UserAddress]:
+    def load_batch(self, s3_uri: str) -> list[LandlordAssetList]:
         raise NotImplementedError()
 
-    def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str:
+    def save_batch(self, addresses: list[LandlordAssetList], path_prefix: str) -> str:
         raise NotImplementedError()
 
 
-def _make_user_address(landlord_additional_info: dict[str, str]) -> UserAddress:
-    return UserAddress(
+def _make_user_address(landlord_additional_info: dict[str, str]) -> LandlordAssetList:
+    return LandlordAssetList(
         user_address="1 High St",
         postcode=Postcode("AA1 1AA"),
         landlord_additional_info=landlord_additional_info,
@@ -65,6 +65,21 @@ def test_repeated_values_collapse_to_one_variant() -> None:
     assert mappings == {"description": {"cosy", "bright"}}
 
 
+def test_case_only_variants_collapse_to_one() -> None:
+    # arrange: the same description typed with inconsistent casing.
+    addresses = [
+        _make_user_address({"description": "Cosy"}),
+        _make_user_address({"description": "cosy"}),
+        _make_user_address({"description": "COSY"}),
+    ]
+
+    # act
+    mappings = _orchestrator().get_col_to_description_mappings(addresses)
+
+    # assert: lower-casing folds the casing typos into one variant.
+    assert mappings == {"description": {"cosy"}}
+
+
 def test_empty_address_list_yields_empty_mapping() -> None:
     # arrange / act
     mappings = _orchestrator().get_col_to_description_mappings([])
diff --git a/tests/repositories/user_address/test_user_address_csv_s3_repository.py b/tests/repositories/user_address/test_user_address_csv_s3_repository.py
index 0f630923..9d53b35b 100644
--- a/tests/repositories/user_address/test_user_address_csv_s3_repository.py
+++ b/tests/repositories/user_address/test_user_address_csv_s3_repository.py
@@ -3,7 +3,7 @@ from collections.abc import Iterator
 import pytest
 from moto import mock_aws
 
-from domain.addresses.user_address import UserAddress
+from domain.addresses.user_address import LandlordAssetList
 from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
 from repositories.user_address.user_address_csv_s3_repository import (
@@ -173,7 +173,7 @@ def test_save_batch_returns_uri_under_path_prefix(
 ) -> None:
     # arrange
     addresses = [
-        UserAddress(
+        LandlordAssetList(
             user_address="1 High Street",
             postcode=Postcode("SW1A 1AA"),
             landlord_additional_info={
@@ -229,7 +229,7 @@ def test_save_batch_uses_unique_filename_per_call(
 ) -> None:
     # arrange
     addresses = [
-        UserAddress(
+        LandlordAssetList(
             user_address="1 High Street",
             postcode=Postcode("SW1A 1AA"),
             landlord_additional_info={

From acb306f7b9dc7a67fc9b3d371df08abdbf471961 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 22 May 2026 07:34:50 +0000
Subject: [PATCH 08/29] asset list from landlord

---
 .../landlord_description_overrides/handler.py |  4 +-
 .../{user_address.py => asset_list.py}        |  8 +--
 domain/addresses/postcode_batching.py         | 14 ++---
 ...lord_description_overrides_orchestrator.py |  8 +--
 .../user_address_csv_s3_repository.py         | 18 +++----
 .../user_address/user_address_repository.py   |  8 ++-
 .../addresses/test_postcode_batching.py       |  8 ++-
 tests/domain/addresses/test_user_address.py   | 52 +++++++++----------
 ...lord_description_overrides_orchestrator.py | 14 ++---
 .../test_user_address_csv_s3_repository.py    | 28 +++++-----
 10 files changed, 78 insertions(+), 84 deletions(-)
 rename domain/addresses/{user_address.py => asset_list.py} (68%)

diff --git a/applications/landlord_description_overrides/handler.py b/applications/landlord_description_overrides/handler.py
index 2655beb9..2691d6d2 100644
--- a/applications/landlord_description_overrides/handler.py
+++ b/applications/landlord_description_overrides/handler.py
@@ -7,7 +7,7 @@ from infrastructure.csv_s3_client import CsvS3Client
 from repositories.user_address.user_address_csv_s3_repository import (
     UserAddressCsvS3Repository,
 )
-from domain.addresses.user_address import LandlordAssetList
+from domain.addresses.user_address import AssetList
 
 
 def handler(
@@ -32,7 +32,7 @@ def handler(
         user_address_repo=user_address_repo,
     )
 
-    list_of_user_address: list[LandlordAssetList] = orchestrator.get_user_address(
+    list_of_user_address: list[AssetList] = orchestrator.get_user_address(
         input_s3_uri=s3_uri
     )
 
diff --git a/domain/addresses/user_address.py b/domain/addresses/asset_list.py
similarity index 68%
rename from domain/addresses/user_address.py
rename to domain/addresses/asset_list.py
index c93f46e5..1332aa2e 100644
--- a/domain/addresses/user_address.py
+++ b/domain/addresses/asset_list.py
@@ -11,10 +11,10 @@ def _empty_source_row() -> dict[str, str]:
 
 
 @dataclass(frozen=True)
-class LandlordAssetList:
-    user_address: str
+class AssetList:
+    address: str
     postcode: Postcode
-    internal_reference: Optional[str] = None
-    landlord_additional_info: dict[str, str] = field(
+    org_reference: Optional[str] = None
+    additional_info: dict[str, str] = field(
         default_factory=_empty_source_row, compare=False
     )
diff --git a/domain/addresses/postcode_batching.py b/domain/addresses/postcode_batching.py
index d4d04b00..fe63605e 100644
--- a/domain/addresses/postcode_batching.py
+++ b/domain/addresses/postcode_batching.py
@@ -2,21 +2,21 @@ from __future__ import annotations
 
 from collections.abc import Iterable, Iterator
 
-from domain.addresses.user_address import LandlordAssetList
+from domain.addresses.user_address import AssetList
 from domain.postcode import Postcode
 
 
 def iter_postcode_grouped_batches(
-    addresses: Iterable[LandlordAssetList],
+    addresses: Iterable[AssetList],
     *,
     max_batch_size: int = 500,
-) -> Iterator[list[LandlordAssetList]]:
+) -> Iterator[list[AssetList]]:
     if max_batch_size < 1:
         raise ValueError("max_batch_size must be >= 1")
 
     groups = _group_by_postcode_in_order(addresses)
 
-    buffer: list[LandlordAssetList] = []
+    buffer: list[AssetList] = []
     for group in groups.values():
         group_len = len(group)
 
@@ -43,9 +43,9 @@ def iter_postcode_grouped_batches(
 
 
 def _group_by_postcode_in_order(
-    addresses: Iterable[LandlordAssetList],
-) -> dict[Postcode, list[LandlordAssetList]]:
-    groups: dict[Postcode, list[LandlordAssetList]] = {}
+    addresses: Iterable[AssetList],
+) -> dict[Postcode, list[AssetList]]:
+    groups: dict[Postcode, list[AssetList]] = {}
     for address in addresses:
         groups.setdefault(address.postcode, []).append(address)
     return groups
diff --git a/orchestration/landlord_description_overrides_orchestrator.py b/orchestration/landlord_description_overrides_orchestrator.py
index 9321994d..18132667 100644
--- a/orchestration/landlord_description_overrides_orchestrator.py
+++ b/orchestration/landlord_description_overrides_orchestrator.py
@@ -1,5 +1,5 @@
 from repositories.user_address.user_address_repository import UserAddressRepository
-from domain.addresses.user_address import LandlordAssetList
+from domain.addresses.user_address import AssetList
 
 
 class LandlordDescriptionOverridesOrchestrator:
@@ -9,15 +9,15 @@ class LandlordDescriptionOverridesOrchestrator:
     def get_user_address(
         self,
         input_s3_uri: str,
-    ) -> list[LandlordAssetList]:
+    ) -> list[AssetList]:
         return self._user_address_repo.load_batch(input_s3_uri)
 
     def get_col_to_description_mappings(
-        self, list_of_user_address: list[LandlordAssetList]
+        self, list_of_user_address: list[AssetList]
     ) -> dict[str, set[str]]:
         mappings: dict[str, set[str]] = {}
         for user_address in list_of_user_address:
-            for key, value in user_address.landlord_additional_info.items():
+            for key, value in user_address.additional_info.items():
                 # Lower-case so case-only typos collapse to one variant.
                 mappings.setdefault(key, set()).add(value.lower())
         return mappings
diff --git a/repositories/user_address/user_address_csv_s3_repository.py b/repositories/user_address/user_address_csv_s3_repository.py
index 612a52ec..adbbfe3e 100644
--- a/repositories/user_address/user_address_csv_s3_repository.py
+++ b/repositories/user_address/user_address_csv_s3_repository.py
@@ -4,7 +4,7 @@ import uuid
 from datetime import datetime, timezone
 from typing import Optional
 
-from domain.addresses.user_address import LandlordAssetList
+from domain.addresses.user_address import AssetList
 from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
 from repositories.user_address.user_address_repository import UserAddressRepository
@@ -20,14 +20,14 @@ class UserAddressCsvS3Repository(UserAddressRepository):
         self._csv_client = csv_client
         self._bucket = bucket
 
-    def load_batch(self, s3_uri: str) -> list[LandlordAssetList]:
+    def load_batch(self, s3_uri: str) -> list[AssetList]:
         rows = self._csv_client.read_rows(s3_uri)
         if rows and _POSTCODE_COLUMN not in rows[0]:
             raise ValueError(
                 f"Input CSV {s3_uri} has no {_POSTCODE_COLUMN!r} column; "
                 f"columns present: {sorted(rows[0])}"
             )
-        addresses: list[LandlordAssetList] = []
+        addresses: list[AssetList] = []
         for row in rows:
             parts = [
                 row[col].strip()
@@ -39,19 +39,19 @@ class UserAddressCsvS3Repository(UserAddressRepository):
             raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip()
             internal_reference: Optional[str] = raw_ref or None
             addresses.append(
-                LandlordAssetList(
-                    user_address=user_address,
+                AssetList(
+                    address=user_address,
                     postcode=Postcode(postcode),
-                    internal_reference=internal_reference,
-                    landlord_additional_info=row,
+                    org_reference=internal_reference,
+                    additional_info=row,
                 )
             )
         return addresses
 
-    def save_batch(self, addresses: list[LandlordAssetList], path_prefix: str) -> str:
+    def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str:
         rows: list[dict[str, str]] = [
             {
-                **addr.landlord_additional_info,
+                **addr.additional_info,
                 _POSTCODE_CLEAN_COLUMN: str(addr.postcode),
             }
             for addr in addresses
diff --git a/repositories/user_address/user_address_repository.py b/repositories/user_address/user_address_repository.py
index b89247c5..eafd0e1d 100644
--- a/repositories/user_address/user_address_repository.py
+++ b/repositories/user_address/user_address_repository.py
@@ -2,14 +2,12 @@ from __future__ import annotations
 
 from abc import ABC, abstractmethod
 
-from domain.addresses.user_address import LandlordAssetList
+from domain.addresses.user_address import AssetList
 
 
 class UserAddressRepository(ABC):
     @abstractmethod
-    def load_batch(self, s3_uri: str) -> list[LandlordAssetList]: ...
+    def load_batch(self, s3_uri: str) -> list[AssetList]: ...
 
     @abstractmethod
-    def save_batch(
-        self, addresses: list[LandlordAssetList], path_prefix: str
-    ) -> str: ...
+    def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str: ...
diff --git a/tests/domain/addresses/test_postcode_batching.py b/tests/domain/addresses/test_postcode_batching.py
index 82e5ced7..4aaeef10 100644
--- a/tests/domain/addresses/test_postcode_batching.py
+++ b/tests/domain/addresses/test_postcode_batching.py
@@ -1,15 +1,13 @@
 import pytest
 
 from domain.addresses.postcode_batching import iter_postcode_grouped_batches
-from domain.addresses.user_address import LandlordAssetList
+from domain.addresses.user_address import AssetList
 from domain.postcode import Postcode
 
 
-def _addrs(postcode: str, n: int) -> list[LandlordAssetList]:
+def _addrs(postcode: str, n: int) -> list[AssetList]:
     return [
-        LandlordAssetList(
-            user_address=f"{i} {postcode} Street", postcode=Postcode(postcode)
-        )
+        AssetList(address=f"{i} {postcode} Street", postcode=Postcode(postcode))
         for i in range(n)
     ]
 
diff --git a/tests/domain/addresses/test_user_address.py b/tests/domain/addresses/test_user_address.py
index 39c52283..be065995 100644
--- a/tests/domain/addresses/test_user_address.py
+++ b/tests/domain/addresses/test_user_address.py
@@ -2,13 +2,13 @@ import dataclasses
 
 import pytest
 
-from domain.addresses.user_address import LandlordAssetList
+from domain.addresses.user_address import AssetList
 from domain.postcode import Postcode
 
 
 def test_user_address_holds_postcode_value_object() -> None:
     # act
-    addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    addr = AssetList(address="1 The Street", postcode=Postcode("sw1a 1aa"))
     # assert
     assert addr.postcode == Postcode("SW1A1AA")
 
@@ -17,34 +17,32 @@ def test_user_address_preserves_user_address_verbatim() -> None:
     # The free-text user_address string is intentionally NOT normalised --
     # only the postcode is canonicalised, and that happens inside Postcode.
     # act
-    addr = LandlordAssetList(
-        user_address="  1 The   Street  ", postcode=Postcode("SW1A1AA")
-    )
+    addr = AssetList(address="  1 The   Street  ", postcode=Postcode("SW1A1AA"))
     # assert
-    assert addr.user_address == "  1 The   Street  "
+    assert addr.address == "  1 The   Street  "
 
 
 def test_user_address_internal_reference_defaults_to_none() -> None:
     # act
-    addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # assert
-    assert addr.internal_reference is None
+    assert addr.org_reference is None
 
 
 def test_user_address_internal_reference_accepted() -> None:
     # act
-    addr = LandlordAssetList(
-        user_address="1 The Street",
+    addr = AssetList(
+        address="1 The Street",
         postcode=Postcode("SW1A1AA"),
-        internal_reference="cust-42",
+        org_reference="cust-42",
     )
     # assert
-    assert addr.internal_reference == "cust-42"
+    assert addr.org_reference == "cust-42"
 
 
 def test_user_address_is_frozen() -> None:
     # arrange
-    addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # act / assert
     with pytest.raises(dataclasses.FrozenInstanceError):
         addr.postcode = Postcode("OTHER")  # type: ignore[misc]
@@ -54,45 +52,45 @@ def test_user_address_equality_uses_canonical_postcode() -> None:
     # Postcode sanitises eagerly, so addresses built from different surface
     # forms of the same postcode compare equal.
     # arrange
-    a = LandlordAssetList(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
-    b = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+    a = AssetList(address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    b = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # act / assert
     assert a == b
 
 
 def test_user_address_source_row_defaults_to_empty_dict() -> None:
     # act
-    addr = LandlordAssetList(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # assert
-    assert addr.landlord_additional_info == {}
+    assert addr.additional_info == {}
 
 
 def test_user_address_carries_source_row() -> None:
     # arrange
     row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
     # act
-    addr = LandlordAssetList(
-        user_address="1 The Street",
+    addr = AssetList(
+        address="1 The Street",
         postcode=Postcode("SW1A 1AA"),
-        landlord_additional_info=row,
+        additional_info=row,
     )
     # assert
-    assert addr.landlord_additional_info == row
+    assert addr.additional_info == row
 
 
 def test_user_address_equality_ignores_source_row() -> None:
     # source_row is excluded from equality (and hashing): identity stays
     # defined by the parsed fields.
     # arrange
-    a = LandlordAssetList(
-        user_address="1 The Street",
+    a = AssetList(
+        address="1 The Street",
         postcode=Postcode("SW1A1AA"),
-        landlord_additional_info={"x": "1"},
+        additional_info={"x": "1"},
     )
-    b = LandlordAssetList(
-        user_address="1 The Street",
+    b = AssetList(
+        address="1 The Street",
         postcode=Postcode("SW1A1AA"),
-        landlord_additional_info={"y": "2"},
+        additional_info={"y": "2"},
     )
     # act / assert
     assert a == b
diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
index c7197071..26cf46b4 100644
--- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py
+++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from domain.addresses.user_address import LandlordAssetList
+from domain.addresses.user_address import AssetList
 from domain.postcode import Postcode
 from orchestration.landlord_description_overrides_orchestrator import (
     LandlordDescriptionOverridesOrchestrator,
@@ -11,18 +11,18 @@ from repositories.user_address.user_address_repository import UserAddressReposit
 class _StubUserAddressRepository(UserAddressRepository):
     """``get_col_to_description_mappings`` never touches the repo."""
 
-    def load_batch(self, s3_uri: str) -> list[LandlordAssetList]:
+    def load_batch(self, s3_uri: str) -> list[AssetList]:
         raise NotImplementedError()
 
-    def save_batch(self, addresses: list[LandlordAssetList], path_prefix: str) -> str:
+    def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str:
         raise NotImplementedError()
 
 
-def _make_user_address(landlord_additional_info: dict[str, str]) -> LandlordAssetList:
-    return LandlordAssetList(
-        user_address="1 High St",
+def _make_user_address(landlord_additional_info: dict[str, str]) -> AssetList:
+    return AssetList(
+        address="1 High St",
         postcode=Postcode("AA1 1AA"),
-        landlord_additional_info=landlord_additional_info,
+        additional_info=landlord_additional_info,
     )
 
 
diff --git a/tests/repositories/user_address/test_user_address_csv_s3_repository.py b/tests/repositories/user_address/test_user_address_csv_s3_repository.py
index 9d53b35b..dc97f0e3 100644
--- a/tests/repositories/user_address/test_user_address_csv_s3_repository.py
+++ b/tests/repositories/user_address/test_user_address_csv_s3_repository.py
@@ -3,7 +3,7 @@ from collections.abc import Iterator
 import pytest
 from moto import mock_aws
 
-from domain.addresses.user_address import LandlordAssetList
+from domain.addresses.user_address import AssetList
 from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
 from repositories.user_address.user_address_csv_s3_repository import (
@@ -50,9 +50,9 @@ def test_load_batch_parses_address_postcode_and_reference(
     # assert
     assert len(addresses) == 1
     address = addresses[0]
-    assert address.user_address == "1 High Street, Flat 2, Townville"
+    assert address.address == "1 High Street, Flat 2, Townville"
     assert address.postcode == Postcode("SW1A1AA")
-    assert address.internal_reference == "REF-001"
+    assert address.org_reference == "REF-001"
 
 
 def test_load_batch_uses_only_address_1_when_others_missing(
@@ -75,9 +75,9 @@ def test_load_batch_uses_only_address_1_when_others_missing(
 
     # assert
     assert len(addresses) == 1
-    assert addresses[0].user_address == "10 Cardiff Road"
+    assert addresses[0].address == "10 Cardiff Road"
     assert addresses[0].postcode == Postcode("CF101AA")
-    assert addresses[0].internal_reference == "REF-002"
+    assert addresses[0].org_reference == "REF-002"
 
 
 def test_load_batch_handles_missing_internal_reference(
@@ -100,9 +100,9 @@ def test_load_batch_handles_missing_internal_reference(
 
     # assert
     assert len(addresses) == 1
-    assert addresses[0].user_address == "5 Park Lane"
+    assert addresses[0].address == "5 Park Lane"
     assert addresses[0].postcode == Postcode("M11AA")
-    assert addresses[0].internal_reference is None
+    assert addresses[0].org_reference is None
 
 
 def test_load_batch_captures_full_source_row(
@@ -124,7 +124,7 @@ def test_load_batch_captures_full_source_row(
     addresses = repo.load_batch(uri)
 
     # assert
-    assert addresses[0].landlord_additional_info == row
+    assert addresses[0].additional_info == row
 
 
 def test_load_batch_raises_when_postcode_column_absent(
@@ -173,10 +173,10 @@ def test_save_batch_returns_uri_under_path_prefix(
 ) -> None:
     # arrange
     addresses = [
-        LandlordAssetList(
-            user_address="1 High Street",
+        AssetList(
+            address="1 High Street",
             postcode=Postcode("SW1A 1AA"),
-            landlord_additional_info={
+            additional_info={
                 "Address 1": "1 High Street",
                 "postcode": "SW1A 1AA",
             },
@@ -229,10 +229,10 @@ def test_save_batch_uses_unique_filename_per_call(
 ) -> None:
     # arrange
     addresses = [
-        LandlordAssetList(
-            user_address="1 High Street",
+        AssetList(
+            address="1 High Street",
             postcode=Postcode("SW1A 1AA"),
-            landlord_additional_info={
+            additional_info={
                 "Address 1": "1 High Street",
                 "postcode": "SW1A 1AA",
             },

From cf14a4e3aaf151c6a472b55483855ffc9ca4aca0 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 22 May 2026 08:14:46 +0000
Subject: [PATCH 09/29] rename to SAL and AssetList and RawAddresses

---
 .../landlord_description_overrides/Dockerfile | 34 ---------
 .../landlord_description_overrides/handler.py | 48 ------------
 .../local_handler/.env.local.example          |  5 --
 .../local_handler/docker-compose.yml          |  9 ---
 .../local_handler/invoke_local_lambda.py      | 16 ----
 .../local_handler/run_local.sh                | 12 ---
 .../requirements.txt                          |  4 -
 applications/postcode_splitter/handler.py     | 12 +--
 domain/addresses/postcode_batching.py         | 20 ++---
 .../{asset_list.py => raw_address.py}         |  8 +-
 ...lord_description_overrides_orchestrator.py | 23 ------
 .../postcode_splitter_orchestrator.py         | 10 +--
 .../{user_address => raw_address}/__init__.py |  0
 .../raw_address_csv_s3_repository.py}         | 18 ++---
 .../raw_address/raw_address_repository.py     | 13 ++++
 .../user_address/user_address_repository.py   | 13 ----
 .../addresses/test_postcode_batching.py       | 14 ++--
 ...st_user_address.py => test_raw_address.py} | 44 +++++------
 ...lord_description_overrides_orchestrator.py | 62 ++++++++-------
 .../test_postcode_splitter_orchestrator.py    | 38 ++++------
 .../{user_address => raw_address}/__init__.py |  0
 .../{user_address => raw_address}/conftest.py |  0
 .../test_raw_address_csv_s3_repository.py}    | 76 ++++++++++---------
 23 files changed, 169 insertions(+), 310 deletions(-)
 delete mode 100644 applications/landlord_description_overrides/Dockerfile
 delete mode 100644 applications/landlord_description_overrides/handler.py
 delete mode 100644 applications/landlord_description_overrides/local_handler/.env.local.example
 delete mode 100644 applications/landlord_description_overrides/local_handler/docker-compose.yml
 delete mode 100755 applications/landlord_description_overrides/local_handler/invoke_local_lambda.py
 delete mode 100755 applications/landlord_description_overrides/local_handler/run_local.sh
 delete mode 100644 applications/landlord_description_overrides/requirements.txt
 rename domain/addresses/{asset_list.py => raw_address.py} (67%)
 delete mode 100644 orchestration/landlord_description_overrides_orchestrator.py
 rename repositories/{user_address => raw_address}/__init__.py (100%)
 rename repositories/{user_address/user_address_csv_s3_repository.py => raw_address/raw_address_csv_s3_repository.py} (80%)
 create mode 100644 repositories/raw_address/raw_address_repository.py
 delete mode 100644 repositories/user_address/user_address_repository.py
 rename tests/domain/addresses/{test_user_address.py => test_raw_address.py} (55%)
 rename tests/repositories/{user_address => raw_address}/__init__.py (100%)
 rename tests/repositories/{user_address => raw_address}/conftest.py (100%)
 rename tests/repositories/{user_address/test_user_address_csv_s3_repository.py => raw_address/test_raw_address_csv_s3_repository.py} (80%)

diff --git a/applications/landlord_description_overrides/Dockerfile b/applications/landlord_description_overrides/Dockerfile
deleted file mode 100644
index e2456b81..00000000
--- a/applications/landlord_description_overrides/Dockerfile
+++ /dev/null
@@ -1,34 +0,0 @@
-FROM public.ecr.aws/lambda/python:3.11
-
-# Postgres host/port/database are baked into the image at build time from
-# the deploy workflow's --build-arg values (GitHub Actions DEV_DB_* secrets),
-# mirroring backend/postcode_splitter/handler/Dockerfile. They map onto the
-# POSTGRES_* names PostgresConfig.from_env reads. Username/password are NOT
-# baked in -- Terraform injects those as Lambda env vars from Secrets Manager.
-ARG DEV_DB_HOST
-ARG DEV_DB_PORT
-ARG DEV_DB_NAME
-
-ENV POSTGRES_HOST=${DEV_DB_HOST}
-ENV POSTGRES_PORT=${DEV_DB_PORT}
-ENV POSTGRES_DATABASE=${DEV_DB_NAME}
-
-WORKDIR /var/task
-
-COPY applications/postcode_splitter/requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
-
-# Copy the layered source the handler imports from. The new splitter pulls
-# only DDD-shaped packages — no pandas, no legacy backend/.
-COPY domain/ domain/
-COPY infrastructure/ infrastructure/
-COPY orchestration/ orchestration/
-COPY repositories/ repositories/
-COPY utilities/ utilities/
-COPY applications/ applications/
-
-# Place the handler at the Lambda task root so the runtime can resolve
-# ``main.handler`` without an extra package prefix.
-COPY applications/landlord_description_overrides/handler.py /var/task/main.py
-
-CMD ["main.handler"]
diff --git a/applications/landlord_description_overrides/handler.py b/applications/landlord_description_overrides/handler.py
deleted file mode 100644
index 2691d6d2..00000000
--- a/applications/landlord_description_overrides/handler.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from typing import Any
-import boto3
-from orchestration.landlord_description_overrides_orchestrator import (
-    LandlordDescriptionOverridesOrchestrator,
-)
-from infrastructure.csv_s3_client import CsvS3Client
-from repositories.user_address.user_address_csv_s3_repository import (
-    UserAddressCsvS3Repository,
-)
-from domain.addresses.user_address import AssetList
-
-
-def handler(
-    body: dict[str, Any],
-    context: Any,
-) -> dict[str, list[str]]:
-
-    s3_uri = "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv"
-    bucket = "retrofit-data-dev"
-
-    # boto3.client is overloaded per-service in the installed stubs; cast
-    # to Any so the strict-mode checker treats it as opaque.
-    boto3_client: Any = (
-        boto3.client
-    )  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
-    boto_s3: Any = boto3_client("s3")
-
-    csv_client = CsvS3Client(boto_s3, bucket)
-    user_address_repo = UserAddressCsvS3Repository(csv_client, bucket)
-
-    orchestrator = LandlordDescriptionOverridesOrchestrator(
-        user_address_repo=user_address_repo,
-    )
-
-    list_of_user_address: list[AssetList] = orchestrator.get_user_address(
-        input_s3_uri=s3_uri
-    )
-
-    col_to_desc_map = orchestrator.get_col_to_description_mappings(
-        list_of_user_address=list_of_user_address
-    )
-
-    # Read csv of user input
-    # get the column and unique variations of each description
-    # { walls: "wall variation 1", "wall varition 2"}
-    # Call chatgpt(input from landlord, our way of understanding the mapping) Retrun -> lanlordMapped
-
-    return {"hello world": ["hello world"]}
diff --git a/applications/landlord_description_overrides/local_handler/.env.local.example b/applications/landlord_description_overrides/local_handler/.env.local.example
deleted file mode 100644
index a78a797f..00000000
--- a/applications/landlord_description_overrides/local_handler/.env.local.example
+++ /dev/null
@@ -1,5 +0,0 @@
-POSTGRES_HOST=
-POSTGRES_PORT=5432
-POSTGRES_USERNAME=
-POSTGRES_PASSWORD=
-POSTGRES_DATABASE=
\ No newline at end of file
diff --git a/applications/landlord_description_overrides/local_handler/docker-compose.yml b/applications/landlord_description_overrides/local_handler/docker-compose.yml
deleted file mode 100644
index 6ead2e33..00000000
--- a/applications/landlord_description_overrides/local_handler/docker-compose.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-services:
-  landlord_overrides:
-    build:
-      context: ../../../
-      dockerfile: applications/landlord_description_overrides/Dockerfile
-    ports:
-      - "9002:8080"
-    env_file:
-      - .env.local
diff --git a/applications/landlord_description_overrides/local_handler/invoke_local_lambda.py b/applications/landlord_description_overrides/local_handler/invoke_local_lambda.py
deleted file mode 100755
index 4514495f..00000000
--- a/applications/landlord_description_overrides/local_handler/invoke_local_lambda.py
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/usr/bin/env python3
-import json
-import requests
-
-HOST = "localhost"
-PORT = "9002"
-
-LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
-
-payload = {"Records": [{"body": json.dumps({})}]}
-
-response = requests.post(LAMBDA_URL, json=payload)
-
-print("Status code:", response.status_code)
-print("Response:")
-print(response.text)
diff --git a/applications/landlord_description_overrides/local_handler/run_local.sh b/applications/landlord_description_overrides/local_handler/run_local.sh
deleted file mode 100755
index 345b60ee..00000000
--- a/applications/landlord_description_overrides/local_handler/run_local.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-cd "$(dirname "$0")"
-
-if [ ! -f .env.local ]; then
-  cp .env.local.example .env.local
-  echo "Created .env.local from the template — fill it in, then re-run." >&2
-  exit 1
-fi
-
-docker compose build --no-cache
-docker compose up --force-recreate
diff --git a/applications/landlord_description_overrides/requirements.txt b/applications/landlord_description_overrides/requirements.txt
deleted file mode 100644
index 6a85a255..00000000
--- a/applications/landlord_description_overrides/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-boto3
-pydantic
-sqlmodel
-psycopg2-binary
diff --git a/applications/postcode_splitter/handler.py b/applications/postcode_splitter/handler.py
index 9fb3ca6a..1f453858 100644
--- a/applications/postcode_splitter/handler.py
+++ b/applications/postcode_splitter/handler.py
@@ -12,8 +12,8 @@ from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
 from infrastructure.csv_s3_client import CsvS3Client
 from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
 from orchestration.task_orchestrator import TaskOrchestrator
-from repositories.user_address.user_address_csv_s3_repository import (
-    UserAddressCsvS3Repository,
+from repositories.raw_address.raw_address_csv_s3_repository import (
+    RawAddressCsvS3Repository,
 )
 from utilities.aws_lambda.subtask_handler import subtask_handler
 
@@ -29,17 +29,19 @@ def handler(
 
     # boto3.client is overloaded per-service in the installed stubs; cast
     # to Any so the strict-mode checker treats it as opaque.
-    boto3_client: Any = boto3.client  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+    boto3_client: Any = (
+        boto3.client
+    )  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
     boto_s3: Any = boto3_client("s3")
     boto_sqs: Any = boto3_client("sqs")
 
     csv_client = CsvS3Client(boto_s3, bucket)
-    user_address_repo = UserAddressCsvS3Repository(csv_client, bucket)
+    raw_address_repo = RawAddressCsvS3Repository(csv_client, bucket)
     queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
 
     splitter = PostcodeSplitterOrchestrator(
         task_orchestrator=task_orchestrator,
-        user_address_repo=user_address_repo,
+        raw_address_repo=raw_address_repo,
         queue_client=queue_client,
     )
 
diff --git a/domain/addresses/postcode_batching.py b/domain/addresses/postcode_batching.py
index fe63605e..dd7203b1 100644
--- a/domain/addresses/postcode_batching.py
+++ b/domain/addresses/postcode_batching.py
@@ -2,21 +2,21 @@ from __future__ import annotations
 
 from collections.abc import Iterable, Iterator
 
-from domain.addresses.user_address import AssetList
+from domain.addresses.raw_address import AddressList, RawAddress
 from domain.postcode import Postcode
 
 
 def iter_postcode_grouped_batches(
-    addresses: Iterable[AssetList],
+    addresses: Iterable[RawAddress],
     *,
     max_batch_size: int = 500,
-) -> Iterator[list[AssetList]]:
+) -> Iterator[AddressList]:
     if max_batch_size < 1:
         raise ValueError("max_batch_size must be >= 1")
 
     groups = _group_by_postcode_in_order(addresses)
 
-    buffer: list[AssetList] = []
+    buffer: AddressList = AddressList([])
     for group in groups.values():
         group_len = len(group)
 
@@ -26,14 +26,14 @@ def iter_postcode_grouped_batches(
         if group_len >= max_batch_size:
             if buffer:
                 yield buffer
-                buffer = []
+                buffer = AddressList([])
             yield group
             continue
 
         # Adding this group would overflow: flush buffer before appending.
         if len(buffer) + group_len > max_batch_size:
             yield buffer
-            buffer = []
+            buffer = AddressList([])
 
         buffer.extend(group)
 
@@ -43,9 +43,9 @@ def iter_postcode_grouped_batches(
 
 
 def _group_by_postcode_in_order(
-    addresses: Iterable[AssetList],
-) -> dict[Postcode, list[AssetList]]:
-    groups: dict[Postcode, list[AssetList]] = {}
+    addresses: Iterable[RawAddress],
+) -> dict[Postcode, AddressList]:
+    groups: dict[Postcode, AddressList] = {}
     for address in addresses:
-        groups.setdefault(address.postcode, []).append(address)
+        groups.setdefault(address.postcode, AddressList([])).append(address)
     return groups
diff --git a/domain/addresses/asset_list.py b/domain/addresses/raw_address.py
similarity index 67%
rename from domain/addresses/asset_list.py
rename to domain/addresses/raw_address.py
index 1332aa2e..f9a2789e 100644
--- a/domain/addresses/asset_list.py
+++ b/domain/addresses/raw_address.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass, field
-from typing import Optional
+from typing import Optional, NewType
 
 from domain.postcode import Postcode
 
@@ -11,10 +11,14 @@ def _empty_source_row() -> dict[str, str]:
 
 
 @dataclass(frozen=True)
-class AssetList:
+class RawAddress:
     address: str
     postcode: Postcode
     org_reference: Optional[str] = None
     additional_info: dict[str, str] = field(
         default_factory=_empty_source_row, compare=False
     )
+
+
+# A batch of raw, pre-standardisation addresses as supplied by a landlord.
+AddressList = NewType("AddressList", list[RawAddress])
diff --git a/orchestration/landlord_description_overrides_orchestrator.py b/orchestration/landlord_description_overrides_orchestrator.py
deleted file mode 100644
index 18132667..00000000
--- a/orchestration/landlord_description_overrides_orchestrator.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from repositories.user_address.user_address_repository import UserAddressRepository
-from domain.addresses.user_address import AssetList
-
-
-class LandlordDescriptionOverridesOrchestrator:
-    def __init__(self, user_address_repo: UserAddressRepository) -> None:
-        self._user_address_repo = user_address_repo
-
-    def get_user_address(
-        self,
-        input_s3_uri: str,
-    ) -> list[AssetList]:
-        return self._user_address_repo.load_batch(input_s3_uri)
-
-    def get_col_to_description_mappings(
-        self, list_of_user_address: list[AssetList]
-    ) -> dict[str, set[str]]:
-        mappings: dict[str, set[str]] = {}
-        for user_address in list_of_user_address:
-            for key, value in user_address.additional_info.items():
-                # Lower-case so case-only typos collapse to one variant.
-                mappings.setdefault(key, set()).add(value.lower())
-        return mappings
diff --git a/orchestration/postcode_splitter_orchestrator.py b/orchestration/postcode_splitter_orchestrator.py
index 36f4b515..f7ea520c 100644
--- a/orchestration/postcode_splitter_orchestrator.py
+++ b/orchestration/postcode_splitter_orchestrator.py
@@ -5,19 +5,19 @@ from uuid import UUID
 from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
 from orchestration.task_orchestrator import TaskOrchestrator
 from domain.addresses.postcode_batching import iter_postcode_grouped_batches
-from repositories.user_address.user_address_repository import UserAddressRepository
+from repositories.raw_address.raw_address_repository import RawAddressRepository
 
 
 class PostcodeSplitterOrchestrator:
     def __init__(
         self,
         task_orchestrator: TaskOrchestrator,
-        user_address_repo: UserAddressRepository,
+        raw_address_repo: RawAddressRepository,
         queue_client: Address2UprnQueueClient,
         max_batch_size: int = 500,
     ) -> None:
         self._task_orchestrator = task_orchestrator
-        self._user_address_repo = user_address_repo
+        self._raw_address_repo = raw_address_repo
         self._queue_client = queue_client
         self._max_batch_size = max_batch_size
 
@@ -28,7 +28,7 @@ class PostcodeSplitterOrchestrator:
         parent_subtask_id: UUID,
         input_s3_uri: str,
     ) -> list[UUID]:
-        addresses = self._user_address_repo.load_batch(input_s3_uri)
+        addresses = self._raw_address_repo.load_batch(input_s3_uri)
         path_prefix = (
             f"ara_postcode_splitter_batches/{parent_task_id}/{parent_subtask_id}"
         )
@@ -37,7 +37,7 @@ class PostcodeSplitterOrchestrator:
         for batch in iter_postcode_grouped_batches(
             addresses, max_batch_size=self._max_batch_size
         ):
-            batch_uri = self._user_address_repo.save_batch(batch, path_prefix)
+            batch_uri = self._raw_address_repo.save_batch(batch, path_prefix)
             child = self._task_orchestrator.create_child_subtask(
                 parent_task_id,
                 inputs={
diff --git a/repositories/user_address/__init__.py b/repositories/raw_address/__init__.py
similarity index 100%
rename from repositories/user_address/__init__.py
rename to repositories/raw_address/__init__.py
diff --git a/repositories/user_address/user_address_csv_s3_repository.py b/repositories/raw_address/raw_address_csv_s3_repository.py
similarity index 80%
rename from repositories/user_address/user_address_csv_s3_repository.py
rename to repositories/raw_address/raw_address_csv_s3_repository.py
index adbbfe3e..5b47438d 100644
--- a/repositories/user_address/user_address_csv_s3_repository.py
+++ b/repositories/raw_address/raw_address_csv_s3_repository.py
@@ -4,10 +4,10 @@ import uuid
 from datetime import datetime, timezone
 from typing import Optional
 
-from domain.addresses.user_address import AssetList
+from domain.addresses.raw_address import AddressList, RawAddress
 from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
-from repositories.user_address.user_address_repository import UserAddressRepository
+from repositories.raw_address.raw_address_repository import RawAddressRepository
 
 _ADDRESS_COLUMNS: tuple[str, str, str] = ("Address 1", "Address 2", "Address 3")
 _POSTCODE_COLUMN: str = "postcode"
@@ -15,32 +15,32 @@ _INTERNAL_REFERENCE_COLUMN: str = "Internal Reference"
 _POSTCODE_CLEAN_COLUMN: str = "postcode_clean"
 
 
-class UserAddressCsvS3Repository(UserAddressRepository):
+class RawAddressCsvS3Repository(RawAddressRepository):
     def __init__(self, csv_client: CsvS3Client, bucket: str) -> None:
         self._csv_client = csv_client
         self._bucket = bucket
 
-    def load_batch(self, s3_uri: str) -> list[AssetList]:
+    def load_batch(self, s3_uri: str) -> AddressList:
         rows = self._csv_client.read_rows(s3_uri)
         if rows and _POSTCODE_COLUMN not in rows[0]:
             raise ValueError(
                 f"Input CSV {s3_uri} has no {_POSTCODE_COLUMN!r} column; "
                 f"columns present: {sorted(rows[0])}"
             )
-        addresses: list[AssetList] = []
+        addresses: AddressList = AddressList([])
         for row in rows:
             parts = [
                 row[col].strip()
                 for col in _ADDRESS_COLUMNS
                 if col in row and row[col].strip()
             ]
-            user_address = ", ".join(parts)
+            raw_address = ", ".join(parts)
             postcode = row.get(_POSTCODE_COLUMN, "")
             raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip()
             internal_reference: Optional[str] = raw_ref or None
             addresses.append(
-                AssetList(
-                    address=user_address,
+                RawAddress(
+                    address=raw_address,
                     postcode=Postcode(postcode),
                     org_reference=internal_reference,
                     additional_info=row,
@@ -48,7 +48,7 @@ class UserAddressCsvS3Repository(UserAddressRepository):
             )
         return addresses
 
-    def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str:
+    def save_batch(self, addresses: AddressList, path_prefix: str) -> str:
         rows: list[dict[str, str]] = [
             {
                 **addr.additional_info,
diff --git a/repositories/raw_address/raw_address_repository.py b/repositories/raw_address/raw_address_repository.py
new file mode 100644
index 00000000..c79d6c4a
--- /dev/null
+++ b/repositories/raw_address/raw_address_repository.py
@@ -0,0 +1,13 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+
+from domain.addresses.raw_address import AddressList
+
+
+class RawAddressRepository(ABC):
+    @abstractmethod
+    def load_batch(self, s3_uri: str) -> AddressList: ...
+
+    @abstractmethod
+    def save_batch(self, addresses: AddressList, path_prefix: str) -> str: ...
diff --git a/repositories/user_address/user_address_repository.py b/repositories/user_address/user_address_repository.py
deleted file mode 100644
index eafd0e1d..00000000
--- a/repositories/user_address/user_address_repository.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-
-from domain.addresses.user_address import AssetList
-
-
-class UserAddressRepository(ABC):
-    @abstractmethod
-    def load_batch(self, s3_uri: str) -> list[AssetList]: ...
-
-    @abstractmethod
-    def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str: ...
diff --git a/tests/domain/addresses/test_postcode_batching.py b/tests/domain/addresses/test_postcode_batching.py
index 4aaeef10..c7bb2d00 100644
--- a/tests/domain/addresses/test_postcode_batching.py
+++ b/tests/domain/addresses/test_postcode_batching.py
@@ -1,15 +1,17 @@
 import pytest
 
 from domain.addresses.postcode_batching import iter_postcode_grouped_batches
-from domain.addresses.user_address import AssetList
+from domain.addresses.raw_address import AddressList, RawAddress
 from domain.postcode import Postcode
 
 
-def _addrs(postcode: str, n: int) -> list[AssetList]:
-    return [
-        AssetList(address=f"{i} {postcode} Street", postcode=Postcode(postcode))
-        for i in range(n)
-    ]
+def _addrs(postcode: str, n: int) -> AddressList:
+    return AddressList(
+        [
+            RawAddress(address=f"{i} {postcode} Street", postcode=Postcode(postcode))
+            for i in range(n)
+        ]
+    )
 
 
 def test_empty_input_yields_no_batches() -> None:
diff --git a/tests/domain/addresses/test_user_address.py b/tests/domain/addresses/test_raw_address.py
similarity index 55%
rename from tests/domain/addresses/test_user_address.py
rename to tests/domain/addresses/test_raw_address.py
index be065995..0309b45e 100644
--- a/tests/domain/addresses/test_user_address.py
+++ b/tests/domain/addresses/test_raw_address.py
@@ -2,36 +2,36 @@ import dataclasses
 
 import pytest
 
-from domain.addresses.user_address import AssetList
+from domain.addresses.raw_address import RawAddress
 from domain.postcode import Postcode
 
 
-def test_user_address_holds_postcode_value_object() -> None:
+def test_raw_address_holds_postcode_value_object() -> None:
     # act
-    addr = AssetList(address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    addr = RawAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
     # assert
     assert addr.postcode == Postcode("SW1A1AA")
 
 
-def test_user_address_preserves_user_address_verbatim() -> None:
-    # The free-text user_address string is intentionally NOT normalised --
+def test_raw_address_preserves_raw_address_verbatim() -> None:
+    # The free-text raw_address string is intentionally NOT normalised --
     # only the postcode is canonicalised, and that happens inside Postcode.
     # act
-    addr = AssetList(address="  1 The   Street  ", postcode=Postcode("SW1A1AA"))
+    addr = RawAddress(address="  1 The   Street  ", postcode=Postcode("SW1A1AA"))
     # assert
     assert addr.address == "  1 The   Street  "
 
 
-def test_user_address_internal_reference_defaults_to_none() -> None:
+def test_raw_address_internal_reference_defaults_to_none() -> None:
     # act
-    addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # assert
     assert addr.org_reference is None
 
 
-def test_user_address_internal_reference_accepted() -> None:
+def test_raw_address_internal_reference_accepted() -> None:
     # act
-    addr = AssetList(
+    addr = RawAddress(
         address="1 The Street",
         postcode=Postcode("SW1A1AA"),
         org_reference="cust-42",
@@ -40,36 +40,36 @@ def test_user_address_internal_reference_accepted() -> None:
     assert addr.org_reference == "cust-42"
 
 
-def test_user_address_is_frozen() -> None:
+def test_raw_address_is_frozen() -> None:
     # arrange
-    addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # act / assert
     with pytest.raises(dataclasses.FrozenInstanceError):
         addr.postcode = Postcode("OTHER")  # type: ignore[misc]
 
 
-def test_user_address_equality_uses_canonical_postcode() -> None:
+def test_raw_address_equality_uses_canonical_postcode() -> None:
     # Postcode sanitises eagerly, so addresses built from different surface
     # forms of the same postcode compare equal.
     # arrange
-    a = AssetList(address="1 The Street", postcode=Postcode("sw1a 1aa"))
-    b = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    a = RawAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    b = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # act / assert
     assert a == b
 
 
-def test_user_address_source_row_defaults_to_empty_dict() -> None:
+def test_raw_address_source_row_defaults_to_empty_dict() -> None:
     # act
-    addr = AssetList(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # assert
     assert addr.additional_info == {}
 
 
-def test_user_address_carries_source_row() -> None:
+def test_raw_address_carries_source_row() -> None:
     # arrange
     row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
     # act
-    addr = AssetList(
+    addr = RawAddress(
         address="1 The Street",
         postcode=Postcode("SW1A 1AA"),
         additional_info=row,
@@ -78,16 +78,16 @@ def test_user_address_carries_source_row() -> None:
     assert addr.additional_info == row
 
 
-def test_user_address_equality_ignores_source_row() -> None:
+def test_raw_address_equality_ignores_source_row() -> None:
     # source_row is excluded from equality (and hashing): identity stays
     # defined by the parsed fields.
     # arrange
-    a = AssetList(
+    a = RawAddress(
         address="1 The Street",
         postcode=Postcode("SW1A1AA"),
         additional_info={"x": "1"},
     )
-    b = AssetList(
+    b = RawAddress(
         address="1 The Street",
         postcode=Postcode("SW1A1AA"),
         additional_info={"y": "2"},
diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
index 26cf46b4..58790cc6 100644
--- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py
+++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
@@ -1,44 +1,44 @@
 from __future__ import annotations
 
-from domain.addresses.user_address import AssetList
+from domain.addresses.raw_address import AddressList, RawAddress
 from domain.postcode import Postcode
 from orchestration.landlord_description_overrides_orchestrator import (
-    LandlordDescriptionOverridesOrchestrator,
+    SALOrchestrator,
 )
-from repositories.user_address.user_address_repository import UserAddressRepository
+from repositories.raw_address.raw_address_repository import RawAddressRepository
 
 
-class _StubUserAddressRepository(UserAddressRepository):
+class _StubRawAddressRepository(RawAddressRepository):
     """``get_col_to_description_mappings`` never touches the repo."""
 
-    def load_batch(self, s3_uri: str) -> list[AssetList]:
+    def load_batch(self, s3_uri: str) -> AddressList:
         raise NotImplementedError()
 
-    def save_batch(self, addresses: list[AssetList], path_prefix: str) -> str:
+    def save_batch(self, addresses: AddressList, path_prefix: str) -> str:
         raise NotImplementedError()
 
 
-def _make_user_address(landlord_additional_info: dict[str, str]) -> AssetList:
-    return AssetList(
+def _make_raw_address(landlord_additional_info: dict[str, str]) -> RawAddress:
+    return RawAddress(
         address="1 High St",
         postcode=Postcode("AA1 1AA"),
         additional_info=landlord_additional_info,
     )
 
 
-def _orchestrator() -> LandlordDescriptionOverridesOrchestrator:
-    return LandlordDescriptionOverridesOrchestrator(
-        user_address_repo=_StubUserAddressRepository()
-    )
+def _orchestrator() -> SALOrchestrator:
+    return SALOrchestrator(raw_address_repo=_StubRawAddressRepository())
 
 
 def test_collects_every_value_per_shared_key() -> None:
     # arrange: every address carries the same keys, all values distinct.
-    addresses = [
-        _make_user_address({"description": "cosy", "condition": "new"}),
-        _make_user_address({"description": "spacious", "condition": "worn"}),
-        _make_user_address({"description": "bright", "condition": "fair"}),
-    ]
+    addresses = AddressList(
+        [
+            _make_raw_address({"description": "cosy", "condition": "new"}),
+            _make_raw_address({"description": "spacious", "condition": "worn"}),
+            _make_raw_address({"description": "bright", "condition": "fair"}),
+        ]
+    )
 
     # act
     mappings = _orchestrator().get_col_to_description_mappings(addresses)
@@ -52,11 +52,13 @@ def test_collects_every_value_per_shared_key() -> None:
 
 def test_repeated_values_collapse_to_one_variant() -> None:
     # arrange: two addresses share the same wall description.
-    addresses = [
-        _make_user_address({"description": "cosy"}),
-        _make_user_address({"description": "cosy"}),
-        _make_user_address({"description": "bright"}),
-    ]
+    addresses = AddressList(
+        [
+            _make_raw_address({"description": "cosy"}),
+            _make_raw_address({"description": "cosy"}),
+            _make_raw_address({"description": "bright"}),
+        ]
+    )
 
     # act
     mappings = _orchestrator().get_col_to_description_mappings(addresses)
@@ -67,11 +69,13 @@ def test_repeated_values_collapse_to_one_variant() -> None:
 
 def test_case_only_variants_collapse_to_one() -> None:
     # arrange: the same description typed with inconsistent casing.
-    addresses = [
-        _make_user_address({"description": "Cosy"}),
-        _make_user_address({"description": "cosy"}),
-        _make_user_address({"description": "COSY"}),
-    ]
+    addresses = AddressList(
+        [
+            _make_raw_address({"description": "Cosy"}),
+            _make_raw_address({"description": "cosy"}),
+            _make_raw_address({"description": "COSY"}),
+        ]
+    )
 
     # act
     mappings = _orchestrator().get_col_to_description_mappings(addresses)
@@ -82,7 +86,7 @@ def test_case_only_variants_collapse_to_one() -> None:
 
 def test_empty_address_list_yields_empty_mapping() -> None:
     # arrange / act
-    mappings = _orchestrator().get_col_to_description_mappings([])
+    mappings = _orchestrator().get_col_to_description_mappings(AddressList([]))
 
     # assert
     assert mappings == {}
@@ -90,7 +94,7 @@ def test_empty_address_list_yields_empty_mapping() -> None:
 
 def test_single_address_yields_single_value_per_key() -> None:
     # arrange
-    addresses = [_make_user_address({"description": "cosy"})]
+    addresses = AddressList([_make_raw_address({"description": "cosy"})])
 
     # act
     mappings = _orchestrator().get_col_to_description_mappings(addresses)
diff --git a/tests/orchestration/test_postcode_splitter_orchestrator.py b/tests/orchestration/test_postcode_splitter_orchestrator.py
index a718ffbc..36039fca 100644
--- a/tests/orchestration/test_postcode_splitter_orchestrator.py
+++ b/tests/orchestration/test_postcode_splitter_orchestrator.py
@@ -18,8 +18,8 @@ from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchest
 from orchestration.task_orchestrator import TaskOrchestrator
 from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
 from repositories.tasks.task_postgres_repository import TaskPostgresRepository
-from repositories.user_address.user_address_csv_s3_repository import (
-    UserAddressCsvS3Repository,
+from repositories.raw_address.raw_address_csv_s3_repository import (
+    RawAddressCsvS3Repository,
 )
 
 BUCKET = "splitter-bucket"
@@ -27,7 +27,9 @@ REGION = "us-east-1"
 
 
 def _make_boto_client(service_name: str) -> Any:
-    factory: Any = boto3.client  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+    factory: Any = (
+        boto3.client
+    )  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
     return factory(service_name, region_name=REGION)
 
 
@@ -62,7 +64,7 @@ class Harness:
     csv_client: CsvS3Client
     boto_sqs: Any
     queue_url: str
-    repo: UserAddressCsvS3Repository
+    repo: RawAddressCsvS3Repository
 
 
 @pytest.fixture
@@ -76,7 +78,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]:
         queue_url = cast(str, queue["QueueUrl"])
 
         csv_client = CsvS3Client(boto_s3, BUCKET)
-        repo = UserAddressCsvS3Repository(csv_client, BUCKET)
+        repo = RawAddressCsvS3Repository(csv_client, BUCKET)
         queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
 
         # DB: ephemeral PostgreSQL TaskOrchestrator
@@ -89,7 +91,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]:
 
             splitter = PostcodeSplitterOrchestrator(
                 task_orchestrator=task_orchestrator,
-                user_address_repo=repo,
+                raw_address_repo=repo,
                 queue_client=queue_client,
                 max_batch_size=3,
             )
@@ -169,10 +171,8 @@ def test_split_and_dispatch_creates_three_children_for_fixture(
     harness: Harness,
 ) -> None:
     # arrange
-    parent_task, parent_subtask = (
-        harness.task_orchestrator.create_task_with_subtask(
-            task_source="manual:postcode-splitter-int"
-        )
+    parent_task, parent_subtask = harness.task_orchestrator.create_task_with_subtask(
+        task_source="manual:postcode-splitter-int"
     )
     input_uri = _upload_fixture_csv(harness.csv_client)
 
@@ -197,10 +197,8 @@ def test_split_and_dispatch_persists_child_inputs_with_task_id_and_s3_uri(
     harness: Harness,
 ) -> None:
     # arrange
-    parent_task, parent_subtask = (
-        harness.task_orchestrator.create_task_with_subtask(
-            task_source="manual:postcode-splitter-int"
-        )
+    parent_task, parent_subtask = harness.task_orchestrator.create_task_with_subtask(
+        task_source="manual:postcode-splitter-int"
     )
     input_uri = _upload_fixture_csv(harness.csv_client)
 
@@ -230,10 +228,8 @@ def test_split_and_dispatch_publishes_one_message_per_child_with_matching_ids(
     harness: Harness,
 ) -> None:
     # arrange
-    parent_task, parent_subtask = (
-        harness.task_orchestrator.create_task_with_subtask(
-            task_source="manual:postcode-splitter-int"
-        )
+    parent_task, parent_subtask = harness.task_orchestrator.create_task_with_subtask(
+        task_source="manual:postcode-splitter-int"
     )
     input_uri = _upload_fixture_csv(harness.csv_client)
 
@@ -267,10 +263,8 @@ def test_split_and_dispatch_returns_child_ids_in_dispatch_order(
     harness: Harness,
 ) -> None:
     # arrange
-    parent_task, parent_subtask = (
-        harness.task_orchestrator.create_task_with_subtask(
-            task_source="manual:postcode-splitter-int"
-        )
+    parent_task, parent_subtask = harness.task_orchestrator.create_task_with_subtask(
+        task_source="manual:postcode-splitter-int"
     )
     input_uri = _upload_fixture_csv(harness.csv_client)
 
diff --git a/tests/repositories/user_address/__init__.py b/tests/repositories/raw_address/__init__.py
similarity index 100%
rename from tests/repositories/user_address/__init__.py
rename to tests/repositories/raw_address/__init__.py
diff --git a/tests/repositories/user_address/conftest.py b/tests/repositories/raw_address/conftest.py
similarity index 100%
rename from tests/repositories/user_address/conftest.py
rename to tests/repositories/raw_address/conftest.py
diff --git a/tests/repositories/user_address/test_user_address_csv_s3_repository.py b/tests/repositories/raw_address/test_raw_address_csv_s3_repository.py
similarity index 80%
rename from tests/repositories/user_address/test_user_address_csv_s3_repository.py
rename to tests/repositories/raw_address/test_raw_address_csv_s3_repository.py
index dc97f0e3..09fc8fc5 100644
--- a/tests/repositories/user_address/test_user_address_csv_s3_repository.py
+++ b/tests/repositories/raw_address/test_raw_address_csv_s3_repository.py
@@ -3,11 +3,11 @@ from collections.abc import Iterator
 import pytest
 from moto import mock_aws
 
-from domain.addresses.user_address import AssetList
+from domain.addresses.raw_address import AddressList, RawAddress
 from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
-from repositories.user_address.user_address_csv_s3_repository import (
-    UserAddressCsvS3Repository,
+from repositories.raw_address.raw_address_csv_s3_repository import (
+    RawAddressCsvS3Repository,
 )
 from tests.infrastructure import make_boto_client
 
@@ -15,22 +15,22 @@ BUCKET = "user-address-bucket"
 
 
 @pytest.fixture
-def repo() -> Iterator[UserAddressCsvS3Repository]:
+def repo() -> Iterator[RawAddressCsvS3Repository]:
     with mock_aws():
         boto_client = make_boto_client("s3")
         boto_client.create_bucket(Bucket=BUCKET)
         csv_client = CsvS3Client(boto_client, BUCKET)
-        yield UserAddressCsvS3Repository(csv_client, BUCKET)
+        yield RawAddressCsvS3Repository(csv_client, BUCKET)
 
 
 def _upload_csv(
-    repo: UserAddressCsvS3Repository, rows: list[dict[str, str]], key: str
+    repo: RawAddressCsvS3Repository, rows: list[dict[str, str]], key: str
 ) -> str:
     return repo._csv_client.save_rows(rows, key)  # pyright: ignore[reportPrivateUsage]
 
 
 def test_load_batch_parses_address_postcode_and_reference(
-    repo: UserAddressCsvS3Repository,
+    repo: RawAddressCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -56,7 +56,7 @@ def test_load_batch_parses_address_postcode_and_reference(
 
 
 def test_load_batch_uses_only_address_1_when_others_missing(
-    repo: UserAddressCsvS3Repository,
+    repo: RawAddressCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -81,7 +81,7 @@ def test_load_batch_uses_only_address_1_when_others_missing(
 
 
 def test_load_batch_handles_missing_internal_reference(
-    repo: UserAddressCsvS3Repository,
+    repo: RawAddressCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -106,10 +106,10 @@ def test_load_batch_handles_missing_internal_reference(
 
 
 def test_load_batch_captures_full_source_row(
-    repo: UserAddressCsvS3Repository,
+    repo: RawAddressCsvS3Repository,
 ) -> None:
     # A raw EPC-export-shaped row: the splitter must preserve every column,
-    # not just the ones it parses into UserAddress fields.
+    # not just the ones it parses into RawAddress fields.
     # arrange
     row = {
         "Asset Reference": "511",
@@ -128,7 +128,7 @@ def test_load_batch_captures_full_source_row(
 
 
 def test_load_batch_raises_when_postcode_column_absent(
-    repo: UserAddressCsvS3Repository,
+    repo: RawAddressCsvS3Repository,
 ) -> None:
     # arrange
     rows = [{"Address 1": "1 High Street", "Property Type": "Flat"}]
@@ -140,7 +140,7 @@ def test_load_batch_raises_when_postcode_column_absent(
 
 
 def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
-    repo: UserAddressCsvS3Repository,
+    repo: RawAddressCsvS3Repository,
 ) -> None:
     # arrange
     row = {
@@ -169,19 +169,21 @@ def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
 
 
 def test_save_batch_returns_uri_under_path_prefix(
-    repo: UserAddressCsvS3Repository,
+    repo: RawAddressCsvS3Repository,
 ) -> None:
     # arrange
-    addresses = [
-        AssetList(
-            address="1 High Street",
-            postcode=Postcode("SW1A 1AA"),
-            additional_info={
-                "Address 1": "1 High Street",
-                "postcode": "SW1A 1AA",
-            },
-        ),
-    ]
+    addresses = AddressList(
+        [
+            RawAddress(
+                address="1 High Street",
+                postcode=Postcode("SW1A 1AA"),
+                additional_info={
+                    "Address 1": "1 High Street",
+                    "postcode": "SW1A 1AA",
+                },
+            ),
+        ]
+    )
 
     # act
     uri = repo.save_batch(addresses, "tasks/abc/batches")
@@ -192,7 +194,7 @@ def test_save_batch_returns_uri_under_path_prefix(
 
 
 def test_save_then_reload_round_trip_preserves_columns(
-    repo: UserAddressCsvS3Repository,
+    repo: RawAddressCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -225,19 +227,21 @@ def test_save_then_reload_round_trip_preserves_columns(
 
 
 def test_save_batch_uses_unique_filename_per_call(
-    repo: UserAddressCsvS3Repository,
+    repo: RawAddressCsvS3Repository,
 ) -> None:
     # arrange
-    addresses = [
-        AssetList(
-            address="1 High Street",
-            postcode=Postcode("SW1A 1AA"),
-            additional_info={
-                "Address 1": "1 High Street",
-                "postcode": "SW1A 1AA",
-            },
-        ),
-    ]
+    addresses = AddressList(
+        [
+            RawAddress(
+                address="1 High Street",
+                postcode=Postcode("SW1A 1AA"),
+                additional_info={
+                    "Address 1": "1 High Street",
+                    "postcode": "SW1A 1AA",
+                },
+            ),
+        ]
+    )
 
     # act
     uri_1 = repo.save_batch(addresses, "tasks/uniqueness")

From 5b677dedbec75d85faa0dad510be4c15d91b7741 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 22 May 2026 08:15:11 +0000
Subject: [PATCH 10/29] SAL

---
 applications/SAL/Dockerfile                   | 34 ++++++++++++++
 applications/SAL/handler.py                   | 46 +++++++++++++++++++
 .../SAL/local_handler/.env.local.example      |  5 ++
 .../SAL/local_handler/docker-compose.yml      |  9 ++++
 .../SAL/local_handler/invoke_local_lambda.py  | 16 +++++++
 applications/SAL/local_handler/run_local.sh   | 12 +++++
 applications/SAL/requirements.txt             |  4 ++
 orchestration/sal_orchestrator.py             | 23 ++++++++++
 8 files changed, 149 insertions(+)
 create mode 100644 applications/SAL/Dockerfile
 create mode 100644 applications/SAL/handler.py
 create mode 100644 applications/SAL/local_handler/.env.local.example
 create mode 100644 applications/SAL/local_handler/docker-compose.yml
 create mode 100755 applications/SAL/local_handler/invoke_local_lambda.py
 create mode 100755 applications/SAL/local_handler/run_local.sh
 create mode 100644 applications/SAL/requirements.txt
 create mode 100644 orchestration/sal_orchestrator.py

diff --git a/applications/SAL/Dockerfile b/applications/SAL/Dockerfile
new file mode 100644
index 00000000..e2456b81
--- /dev/null
+++ b/applications/SAL/Dockerfile
@@ -0,0 +1,34 @@
+FROM public.ecr.aws/lambda/python:3.11
+
+# Postgres host/port/database are baked into the image at build time from
+# the deploy workflow's --build-arg values (GitHub Actions DEV_DB_* secrets),
+# mirroring backend/postcode_splitter/handler/Dockerfile. They map onto the
+# POSTGRES_* names PostgresConfig.from_env reads. Username/password are NOT
+# baked in -- Terraform injects those as Lambda env vars from Secrets Manager.
+ARG DEV_DB_HOST
+ARG DEV_DB_PORT
+ARG DEV_DB_NAME
+
+ENV POSTGRES_HOST=${DEV_DB_HOST}
+ENV POSTGRES_PORT=${DEV_DB_PORT}
+ENV POSTGRES_DATABASE=${DEV_DB_NAME}
+
+WORKDIR /var/task
+
+COPY applications/postcode_splitter/requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the layered source the handler imports from. The new splitter pulls
+# only DDD-shaped packages — no pandas, no legacy backend/.
+COPY domain/ domain/
+COPY infrastructure/ infrastructure/
+COPY orchestration/ orchestration/
+COPY repositories/ repositories/
+COPY utilities/ utilities/
+COPY applications/ applications/
+
+# Place the handler at the Lambda task root so the runtime can resolve
+# ``main.handler`` without an extra package prefix.
+COPY applications/landlord_description_overrides/handler.py /var/task/main.py
+
+CMD ["main.handler"]
diff --git a/applications/SAL/handler.py b/applications/SAL/handler.py
new file mode 100644
index 00000000..73dffd5a
--- /dev/null
+++ b/applications/SAL/handler.py
@@ -0,0 +1,46 @@
+from typing import Any
+import boto3
+from orchestration.landlord_description_overrides_orchestrator import (
+    SALOrchestrator,
+)
+from infrastructure.csv_s3_client import CsvS3Client
+from repositories.raw_address.raw_address_csv_s3_repository import (
+    RawAddressCsvS3Repository,
+)
+from domain.addresses.raw_address import AddressList
+
+
+def handler(
+    body: dict[str, Any],
+    context: Any,
+) -> dict[str, list[str]]:
+
+    s3_uri = "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv"
+    bucket = "retrofit-data-dev"
+
+    # boto3.client is overloaded per-service in the installed stubs; cast
+    # to Any so the strict-mode checker treats it as opaque.
+    boto3_client: Any = boto3.client  # noqa
+    boto_s3: Any = boto3_client("s3")
+
+    csv_client = CsvS3Client(boto_s3, bucket)
+    raw_address_repo = RawAddressCsvS3Repository(csv_client, bucket)
+
+    orchestrator = SALOrchestrator(
+        raw_address_repo=raw_address_repo,
+    )
+
+    list_of_raw_address: AddressList = orchestrator.get_raw_addresses(
+        input_s3_uri=s3_uri
+    )
+
+    col_to_desc_map = orchestrator.get_col_to_description_mappings(
+        list_of_raw_address=list_of_raw_address
+    )
+
+    # Read csv of user input
+    # get the column and unique variations of each description
+    # { walls: "wall variation 1", "wall varition 2"}
+    # Call chatgpt(input from landlord, our way of understanding the mapping) Retrun -> lanlordMapped
+
+    return {"hello world": ["hello world"]}
diff --git a/applications/SAL/local_handler/.env.local.example b/applications/SAL/local_handler/.env.local.example
new file mode 100644
index 00000000..a78a797f
--- /dev/null
+++ b/applications/SAL/local_handler/.env.local.example
@@ -0,0 +1,5 @@
+POSTGRES_HOST=
+POSTGRES_PORT=5432
+POSTGRES_USERNAME=
+POSTGRES_PASSWORD=
+POSTGRES_DATABASE=
\ No newline at end of file
diff --git a/applications/SAL/local_handler/docker-compose.yml b/applications/SAL/local_handler/docker-compose.yml
new file mode 100644
index 00000000..6ead2e33
--- /dev/null
+++ b/applications/SAL/local_handler/docker-compose.yml
@@ -0,0 +1,9 @@
+services:
+  landlord_overrides:
+    build:
+      context: ../../../
+      dockerfile: applications/landlord_description_overrides/Dockerfile
+    ports:
+      - "9002:8080"
+    env_file:
+      - .env.local
diff --git a/applications/SAL/local_handler/invoke_local_lambda.py b/applications/SAL/local_handler/invoke_local_lambda.py
new file mode 100755
index 00000000..4514495f
--- /dev/null
+++ b/applications/SAL/local_handler/invoke_local_lambda.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python3
+import json
+import requests
+
+HOST = "localhost"
+PORT = "9002"
+
+LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
+
+payload = {"Records": [{"body": json.dumps({})}]}
+
+response = requests.post(LAMBDA_URL, json=payload)
+
+print("Status code:", response.status_code)
+print("Response:")
+print(response.text)
diff --git a/applications/SAL/local_handler/run_local.sh b/applications/SAL/local_handler/run_local.sh
new file mode 100755
index 00000000..345b60ee
--- /dev/null
+++ b/applications/SAL/local_handler/run_local.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+
+if [ ! -f .env.local ]; then
+  cp .env.local.example .env.local
+  echo "Created .env.local from the template — fill it in, then re-run." >&2
+  exit 1
+fi
+
+docker compose build --no-cache
+docker compose up --force-recreate
diff --git a/applications/SAL/requirements.txt b/applications/SAL/requirements.txt
new file mode 100644
index 00000000..6a85a255
--- /dev/null
+++ b/applications/SAL/requirements.txt
@@ -0,0 +1,4 @@
+boto3
+pydantic
+sqlmodel
+psycopg2-binary
diff --git a/orchestration/sal_orchestrator.py b/orchestration/sal_orchestrator.py
new file mode 100644
index 00000000..e9584aa1
--- /dev/null
+++ b/orchestration/sal_orchestrator.py
@@ -0,0 +1,23 @@
+from repositories.raw_address.raw_address_repository import RawAddressRepository
+from domain.addresses.raw_address import AddressList
+
+
+class SALOrchestrator:
+    def __init__(self, raw_address_repo: RawAddressRepository) -> None:
+        self._raw_address_repo = raw_address_repo
+
+    def get_raw_addresses(
+        self,
+        input_s3_uri: str,
+    ) -> AddressList:
+        return self._raw_address_repo.load_batch(input_s3_uri)
+
+    def get_col_to_description_mappings(
+        self, list_of_raw_address: AddressList
+    ) -> dict[str, set[str]]:
+        mappings: dict[str, set[str]] = {}
+        for raw_address in list_of_raw_address:
+            for key, value in raw_address.additional_info.items():
+                # Lower-case so case-only typos collapse to one variant.
+                mappings.setdefault(key, set()).add(value.lower())
+        return mappings

From 84098e28ff5937c012a18f72bd7217339f91d33c Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 22 May 2026 08:17:37 +0000
Subject: [PATCH 11/29] raw address list repo

---
 applications/SAL/handler.py                   |  6 ++---
 .../postcode_splitter_orchestrator.py         |  4 +--
 orchestration/sal_orchestrator.py             |  4 +--
 ... => raw_address_list_csv_s3_repository.py} |  4 +--
 ...tory.py => raw_address_list_repository.py} |  2 +-
 ...lord_description_overrides_orchestrator.py |  6 ++---
 .../test_postcode_splitter_orchestrator.py    |  6 ++---
 .../test_raw_address_csv_s3_repository.py     | 26 +++++++++----------
 8 files changed, 29 insertions(+), 29 deletions(-)
 rename repositories/raw_address/{raw_address_csv_s3_repository.py => raw_address_list_csv_s3_repository.py} (96%)
 rename repositories/raw_address/{raw_address_repository.py => raw_address_list_repository.py} (89%)

diff --git a/applications/SAL/handler.py b/applications/SAL/handler.py
index 73dffd5a..c975a039 100644
--- a/applications/SAL/handler.py
+++ b/applications/SAL/handler.py
@@ -1,11 +1,11 @@
 from typing import Any
 import boto3
-from orchestration.landlord_description_overrides_orchestrator import (
+from orchestration.sal_orchestrator import (
     SALOrchestrator,
 )
 from infrastructure.csv_s3_client import CsvS3Client
 from repositories.raw_address.raw_address_csv_s3_repository import (
-    RawAddressCsvS3Repository,
+    RawAddressListCsvS3Repository,
 )
 from domain.addresses.raw_address import AddressList
 
@@ -24,7 +24,7 @@ def handler(
     boto_s3: Any = boto3_client("s3")
 
     csv_client = CsvS3Client(boto_s3, bucket)
-    raw_address_repo = RawAddressCsvS3Repository(csv_client, bucket)
+    raw_address_repo = RawAddressListCsvS3Repository(csv_client, bucket)
 
     orchestrator = SALOrchestrator(
         raw_address_repo=raw_address_repo,
diff --git a/orchestration/postcode_splitter_orchestrator.py b/orchestration/postcode_splitter_orchestrator.py
index f7ea520c..d1530e9f 100644
--- a/orchestration/postcode_splitter_orchestrator.py
+++ b/orchestration/postcode_splitter_orchestrator.py
@@ -5,14 +5,14 @@ from uuid import UUID
 from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
 from orchestration.task_orchestrator import TaskOrchestrator
 from domain.addresses.postcode_batching import iter_postcode_grouped_batches
-from repositories.raw_address.raw_address_repository import RawAddressRepository
+from repositories.raw_address.raw_address_repository import RawAddressListRepository
 
 
 class PostcodeSplitterOrchestrator:
     def __init__(
         self,
         task_orchestrator: TaskOrchestrator,
-        raw_address_repo: RawAddressRepository,
+        raw_address_repo: RawAddressListRepository,
         queue_client: Address2UprnQueueClient,
         max_batch_size: int = 500,
     ) -> None:
diff --git a/orchestration/sal_orchestrator.py b/orchestration/sal_orchestrator.py
index e9584aa1..1154befc 100644
--- a/orchestration/sal_orchestrator.py
+++ b/orchestration/sal_orchestrator.py
@@ -1,9 +1,9 @@
-from repositories.raw_address.raw_address_repository import RawAddressRepository
+from repositories.raw_address.raw_address_repository import RawAddressListRepository
 from domain.addresses.raw_address import AddressList
 
 
 class SALOrchestrator:
-    def __init__(self, raw_address_repo: RawAddressRepository) -> None:
+    def __init__(self, raw_address_repo: RawAddressListRepository) -> None:
         self._raw_address_repo = raw_address_repo
 
     def get_raw_addresses(
diff --git a/repositories/raw_address/raw_address_csv_s3_repository.py b/repositories/raw_address/raw_address_list_csv_s3_repository.py
similarity index 96%
rename from repositories/raw_address/raw_address_csv_s3_repository.py
rename to repositories/raw_address/raw_address_list_csv_s3_repository.py
index 5b47438d..b0c2eec7 100644
--- a/repositories/raw_address/raw_address_csv_s3_repository.py
+++ b/repositories/raw_address/raw_address_list_csv_s3_repository.py
@@ -7,7 +7,7 @@ from typing import Optional
 from domain.addresses.raw_address import AddressList, RawAddress
 from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
-from repositories.raw_address.raw_address_repository import RawAddressRepository
+from repositories.raw_address.raw_address_repository import RawAddressListRepository
 
 _ADDRESS_COLUMNS: tuple[str, str, str] = ("Address 1", "Address 2", "Address 3")
 _POSTCODE_COLUMN: str = "postcode"
@@ -15,7 +15,7 @@ _INTERNAL_REFERENCE_COLUMN: str = "Internal Reference"
 _POSTCODE_CLEAN_COLUMN: str = "postcode_clean"
 
 
-class RawAddressCsvS3Repository(RawAddressRepository):
+class RawAddressListCsvS3Repository(RawAddressListRepository):
     def __init__(self, csv_client: CsvS3Client, bucket: str) -> None:
         self._csv_client = csv_client
         self._bucket = bucket
diff --git a/repositories/raw_address/raw_address_repository.py b/repositories/raw_address/raw_address_list_repository.py
similarity index 89%
rename from repositories/raw_address/raw_address_repository.py
rename to repositories/raw_address/raw_address_list_repository.py
index c79d6c4a..8abb96be 100644
--- a/repositories/raw_address/raw_address_repository.py
+++ b/repositories/raw_address/raw_address_list_repository.py
@@ -5,7 +5,7 @@ from abc import ABC, abstractmethod
 from domain.addresses.raw_address import AddressList
 
 
-class RawAddressRepository(ABC):
+class RawAddressListRepository(ABC):
     @abstractmethod
     def load_batch(self, s3_uri: str) -> AddressList: ...
 
diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
index 58790cc6..bb79df6c 100644
--- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py
+++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
@@ -2,13 +2,13 @@ from __future__ import annotations
 
 from domain.addresses.raw_address import AddressList, RawAddress
 from domain.postcode import Postcode
-from orchestration.landlord_description_overrides_orchestrator import (
+from orchestration.sal_orchestrator import (
     SALOrchestrator,
 )
-from repositories.raw_address.raw_address_repository import RawAddressRepository
+from repositories.raw_address.raw_address_repository import RawAddressListRepository
 
 
-class _StubRawAddressRepository(RawAddressRepository):
+class _StubRawAddressRepository(RawAddressListRepository):
     """``get_col_to_description_mappings`` never touches the repo."""
 
     def load_batch(self, s3_uri: str) -> AddressList:
diff --git a/tests/orchestration/test_postcode_splitter_orchestrator.py b/tests/orchestration/test_postcode_splitter_orchestrator.py
index 36039fca..0ce81781 100644
--- a/tests/orchestration/test_postcode_splitter_orchestrator.py
+++ b/tests/orchestration/test_postcode_splitter_orchestrator.py
@@ -19,7 +19,7 @@ from orchestration.task_orchestrator import TaskOrchestrator
 from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
 from repositories.tasks.task_postgres_repository import TaskPostgresRepository
 from repositories.raw_address.raw_address_csv_s3_repository import (
-    RawAddressCsvS3Repository,
+    RawAddressListCsvS3Repository,
 )
 
 BUCKET = "splitter-bucket"
@@ -64,7 +64,7 @@ class Harness:
     csv_client: CsvS3Client
     boto_sqs: Any
     queue_url: str
-    repo: RawAddressCsvS3Repository
+    repo: RawAddressListCsvS3Repository
 
 
 @pytest.fixture
@@ -78,7 +78,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]:
         queue_url = cast(str, queue["QueueUrl"])
 
         csv_client = CsvS3Client(boto_s3, BUCKET)
-        repo = RawAddressCsvS3Repository(csv_client, BUCKET)
+        repo = RawAddressListCsvS3Repository(csv_client, BUCKET)
         queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
 
         # DB: ephemeral PostgreSQL TaskOrchestrator
diff --git a/tests/repositories/raw_address/test_raw_address_csv_s3_repository.py b/tests/repositories/raw_address/test_raw_address_csv_s3_repository.py
index 09fc8fc5..99284ec5 100644
--- a/tests/repositories/raw_address/test_raw_address_csv_s3_repository.py
+++ b/tests/repositories/raw_address/test_raw_address_csv_s3_repository.py
@@ -7,7 +7,7 @@ from domain.addresses.raw_address import AddressList, RawAddress
 from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
 from repositories.raw_address.raw_address_csv_s3_repository import (
-    RawAddressCsvS3Repository,
+    RawAddressListCsvS3Repository,
 )
 from tests.infrastructure import make_boto_client
 
@@ -15,22 +15,22 @@ BUCKET = "user-address-bucket"
 
 
 @pytest.fixture
-def repo() -> Iterator[RawAddressCsvS3Repository]:
+def repo() -> Iterator[RawAddressListCsvS3Repository]:
     with mock_aws():
         boto_client = make_boto_client("s3")
         boto_client.create_bucket(Bucket=BUCKET)
         csv_client = CsvS3Client(boto_client, BUCKET)
-        yield RawAddressCsvS3Repository(csv_client, BUCKET)
+        yield RawAddressListCsvS3Repository(csv_client, BUCKET)
 
 
 def _upload_csv(
-    repo: RawAddressCsvS3Repository, rows: list[dict[str, str]], key: str
+    repo: RawAddressListCsvS3Repository, rows: list[dict[str, str]], key: str
 ) -> str:
     return repo._csv_client.save_rows(rows, key)  # pyright: ignore[reportPrivateUsage]
 
 
 def test_load_batch_parses_address_postcode_and_reference(
-    repo: RawAddressCsvS3Repository,
+    repo: RawAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -56,7 +56,7 @@ def test_load_batch_parses_address_postcode_and_reference(
 
 
 def test_load_batch_uses_only_address_1_when_others_missing(
-    repo: RawAddressCsvS3Repository,
+    repo: RawAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -81,7 +81,7 @@ def test_load_batch_uses_only_address_1_when_others_missing(
 
 
 def test_load_batch_handles_missing_internal_reference(
-    repo: RawAddressCsvS3Repository,
+    repo: RawAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -106,7 +106,7 @@ def test_load_batch_handles_missing_internal_reference(
 
 
 def test_load_batch_captures_full_source_row(
-    repo: RawAddressCsvS3Repository,
+    repo: RawAddressListCsvS3Repository,
 ) -> None:
     # A raw EPC-export-shaped row: the splitter must preserve every column,
     # not just the ones it parses into RawAddress fields.
@@ -128,7 +128,7 @@ def test_load_batch_captures_full_source_row(
 
 
 def test_load_batch_raises_when_postcode_column_absent(
-    repo: RawAddressCsvS3Repository,
+    repo: RawAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [{"Address 1": "1 High Street", "Property Type": "Flat"}]
@@ -140,7 +140,7 @@ def test_load_batch_raises_when_postcode_column_absent(
 
 
 def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
-    repo: RawAddressCsvS3Repository,
+    repo: RawAddressListCsvS3Repository,
 ) -> None:
     # arrange
     row = {
@@ -169,7 +169,7 @@ def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
 
 
 def test_save_batch_returns_uri_under_path_prefix(
-    repo: RawAddressCsvS3Repository,
+    repo: RawAddressListCsvS3Repository,
 ) -> None:
     # arrange
     addresses = AddressList(
@@ -194,7 +194,7 @@ def test_save_batch_returns_uri_under_path_prefix(
 
 
 def test_save_then_reload_round_trip_preserves_columns(
-    repo: RawAddressCsvS3Repository,
+    repo: RawAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -227,7 +227,7 @@ def test_save_then_reload_round_trip_preserves_columns(
 
 
 def test_save_batch_uses_unique_filename_per_call(
-    repo: RawAddressCsvS3Repository,
+    repo: RawAddressListCsvS3Repository,
 ) -> None:
     # arrange
     addresses = AddressList(

From 91bb4b6571402b96e2573e8cd194b71c7c16fd18 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 22 May 2026 08:22:13 +0000
Subject: [PATCH 12/29] address list

---
 applications/SAL/handler.py                               | 2 +-
 applications/postcode_splitter/handler.py                 | 8 ++++----
 orchestration/postcode_splitter_orchestrator.py           | 4 +++-
 orchestration/sal_orchestrator.py                         | 4 +++-
 .../raw_address/raw_address_list_csv_s3_repository.py     | 4 +++-
 .../test_landlord_description_overrides_orchestrator.py   | 4 +++-
 .../orchestration/test_postcode_splitter_orchestrator.py  | 2 +-
 ...tory.py => test_raw_address_list_csv_s3_repository.py} | 2 +-
 8 files changed, 19 insertions(+), 11 deletions(-)
 rename tests/repositories/raw_address/{test_raw_address_csv_s3_repository.py => test_raw_address_list_csv_s3_repository.py} (98%)

diff --git a/applications/SAL/handler.py b/applications/SAL/handler.py
index c975a039..69f4c04d 100644
--- a/applications/SAL/handler.py
+++ b/applications/SAL/handler.py
@@ -4,7 +4,7 @@ from orchestration.sal_orchestrator import (
     SALOrchestrator,
 )
 from infrastructure.csv_s3_client import CsvS3Client
-from repositories.raw_address.raw_address_csv_s3_repository import (
+from repositories.raw_address.raw_address_list_csv_s3_repository import (
     RawAddressListCsvS3Repository,
 )
 from domain.addresses.raw_address import AddressList
diff --git a/applications/postcode_splitter/handler.py b/applications/postcode_splitter/handler.py
index 1f453858..071ff6f9 100644
--- a/applications/postcode_splitter/handler.py
+++ b/applications/postcode_splitter/handler.py
@@ -12,8 +12,8 @@ from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
 from infrastructure.csv_s3_client import CsvS3Client
 from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
 from orchestration.task_orchestrator import TaskOrchestrator
-from repositories.raw_address.raw_address_csv_s3_repository import (
-    RawAddressCsvS3Repository,
+from repositories.raw_address.raw_address_list_csv_s3_repository import (
+    RawAddressListCsvS3Repository,
 )
 from utilities.aws_lambda.subtask_handler import subtask_handler
 
@@ -36,12 +36,12 @@ def handler(
     boto_sqs: Any = boto3_client("sqs")
 
     csv_client = CsvS3Client(boto_s3, bucket)
-    raw_address_repo = RawAddressCsvS3Repository(csv_client, bucket)
+    user_address_repo = RawAddressListCsvS3Repository(csv_client, bucket)
     queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
 
     splitter = PostcodeSplitterOrchestrator(
         task_orchestrator=task_orchestrator,
-        raw_address_repo=raw_address_repo,
+        user_address_repo=user_address_repo,
         queue_client=queue_client,
     )
 
diff --git a/orchestration/postcode_splitter_orchestrator.py b/orchestration/postcode_splitter_orchestrator.py
index d1530e9f..20145524 100644
--- a/orchestration/postcode_splitter_orchestrator.py
+++ b/orchestration/postcode_splitter_orchestrator.py
@@ -5,7 +5,9 @@ from uuid import UUID
 from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
 from orchestration.task_orchestrator import TaskOrchestrator
 from domain.addresses.postcode_batching import iter_postcode_grouped_batches
-from repositories.raw_address.raw_address_repository import RawAddressListRepository
+from repositories.raw_address.raw_address_list_repository import (
+    RawAddressListRepository,
+)
 
 
 class PostcodeSplitterOrchestrator:
diff --git a/orchestration/sal_orchestrator.py b/orchestration/sal_orchestrator.py
index 1154befc..f55947e7 100644
--- a/orchestration/sal_orchestrator.py
+++ b/orchestration/sal_orchestrator.py
@@ -1,4 +1,6 @@
-from repositories.raw_address.raw_address_repository import RawAddressListRepository
+from repositories.raw_address.raw_address_list_repository import (
+    RawAddressListRepository,
+)
 from domain.addresses.raw_address import AddressList
 
 
diff --git a/repositories/raw_address/raw_address_list_csv_s3_repository.py b/repositories/raw_address/raw_address_list_csv_s3_repository.py
index b0c2eec7..a636b17b 100644
--- a/repositories/raw_address/raw_address_list_csv_s3_repository.py
+++ b/repositories/raw_address/raw_address_list_csv_s3_repository.py
@@ -7,7 +7,9 @@ from typing import Optional
 from domain.addresses.raw_address import AddressList, RawAddress
 from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
-from repositories.raw_address.raw_address_repository import RawAddressListRepository
+from repositories.raw_address.raw_address_list_repository import (
+    RawAddressListRepository,
+)
 
 _ADDRESS_COLUMNS: tuple[str, str, str] = ("Address 1", "Address 2", "Address 3")
 _POSTCODE_COLUMN: str = "postcode"
diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
index bb79df6c..133d5b39 100644
--- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py
+++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
@@ -5,7 +5,9 @@ from domain.postcode import Postcode
 from orchestration.sal_orchestrator import (
     SALOrchestrator,
 )
-from repositories.raw_address.raw_address_repository import RawAddressListRepository
+from repositories.raw_address.raw_address_list_repository import (
+    RawAddressListRepository,
+)
 
 
 class _StubRawAddressRepository(RawAddressListRepository):
diff --git a/tests/orchestration/test_postcode_splitter_orchestrator.py b/tests/orchestration/test_postcode_splitter_orchestrator.py
index 0ce81781..1540112f 100644
--- a/tests/orchestration/test_postcode_splitter_orchestrator.py
+++ b/tests/orchestration/test_postcode_splitter_orchestrator.py
@@ -18,7 +18,7 @@ from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchest
 from orchestration.task_orchestrator import TaskOrchestrator
 from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
 from repositories.tasks.task_postgres_repository import TaskPostgresRepository
-from repositories.raw_address.raw_address_csv_s3_repository import (
+from repositories.raw_address.raw_address_list_csv_s3_repository import (
     RawAddressListCsvS3Repository,
 )
 
diff --git a/tests/repositories/raw_address/test_raw_address_csv_s3_repository.py b/tests/repositories/raw_address/test_raw_address_list_csv_s3_repository.py
similarity index 98%
rename from tests/repositories/raw_address/test_raw_address_csv_s3_repository.py
rename to tests/repositories/raw_address/test_raw_address_list_csv_s3_repository.py
index 99284ec5..8870b29a 100644
--- a/tests/repositories/raw_address/test_raw_address_csv_s3_repository.py
+++ b/tests/repositories/raw_address/test_raw_address_list_csv_s3_repository.py
@@ -6,7 +6,7 @@ from moto import mock_aws
 from domain.addresses.raw_address import AddressList, RawAddress
 from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
-from repositories.raw_address.raw_address_csv_s3_repository import (
+from repositories.raw_address.raw_address_list_csv_s3_repository import (
     RawAddressListCsvS3Repository,
 )
 from tests.infrastructure import make_boto_client

From 0dee917094057da947dd0ff3ec9b28833d48cd9b Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 22 May 2026 08:27:59 +0000
Subject: [PATCH 13/29] unsanistiesed address list instead of raw address lit

---
 applications/SAL/handler.py                   | 20 ++++-----
 applications/postcode_splitter/handler.py     |  8 ++--
 domain/addresses/postcode_batching.py         |  6 +--
 ...{raw_address.py => unsanitised_address.py} |  4 +-
 .../postcode_splitter_orchestrator.py         | 12 ++---
 orchestration/sal_orchestrator.py             | 20 ++++-----
 .../__init__.py                               |  0
 ...nitised_address_list_csv_s3_repository.py} | 14 +++---
 .../unsanitised_address_list_repository.py}   |  4 +-
 .../addresses/test_postcode_batching.py       |  4 +-
 ...address.py => test_unsanitised_address.py} | 44 +++++++++----------
 ...lord_description_overrides_orchestrator.py | 34 +++++++-------
 .../test_postcode_splitter_orchestrator.py    | 10 ++---
 .../__init__.py                               |  0
 .../conftest.py                               |  0
 ...nitised_address_list_csv_s3_repository.py} | 36 +++++++--------
 16 files changed, 107 insertions(+), 109 deletions(-)
 rename domain/addresses/{raw_address.py => unsanitised_address.py} (84%)
 rename repositories/{raw_address => unsanitised_address}/__init__.py (100%)
 rename repositories/{raw_address/raw_address_list_csv_s3_repository.py => unsanitised_address/unsanitised_address_list_csv_s3_repository.py} (84%)
 rename repositories/{raw_address/raw_address_list_repository.py => unsanitised_address/unsanitised_address_list_repository.py} (70%)
 rename tests/domain/addresses/{test_raw_address.py => test_unsanitised_address.py} (51%)
 rename tests/repositories/{raw_address => unsanitised_address}/__init__.py (100%)
 rename tests/repositories/{raw_address => unsanitised_address}/conftest.py (100%)
 rename tests/repositories/{raw_address/test_raw_address_list_csv_s3_repository.py => unsanitised_address/test_unsanitised_address_list_csv_s3_repository.py} (86%)

diff --git a/applications/SAL/handler.py b/applications/SAL/handler.py
index 69f4c04d..fbed3b83 100644
--- a/applications/SAL/handler.py
+++ b/applications/SAL/handler.py
@@ -4,10 +4,10 @@ from orchestration.sal_orchestrator import (
     SALOrchestrator,
 )
 from infrastructure.csv_s3_client import CsvS3Client
-from repositories.raw_address.raw_address_list_csv_s3_repository import (
-    RawAddressListCsvS3Repository,
+from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import (
+    UnsanitisedAddressListCsvS3Repository,
 )
-from domain.addresses.raw_address import AddressList
+from domain.addresses.unsanitised_address import AddressList
 
 
 def handler(
@@ -24,18 +24,16 @@ def handler(
     boto_s3: Any = boto3_client("s3")
 
     csv_client = CsvS3Client(boto_s3, bucket)
-    raw_address_repo = RawAddressListCsvS3Repository(csv_client, bucket)
+    unsanitised_address_repo = UnsanitisedAddressListCsvS3Repository(csv_client, bucket)
 
-    orchestrator = SALOrchestrator(
-        raw_address_repo=raw_address_repo,
+    sal = SALOrchestrator(
+        unsanitised_address_repo=unsanitised_address_repo,
     )
 
-    list_of_raw_address: AddressList = orchestrator.get_raw_addresses(
-        input_s3_uri=s3_uri
-    )
+    addressList: AddressList = sal.get_unsanitised_addresses(input_s3_uri=s3_uri)
 
-    col_to_desc_map = orchestrator.get_col_to_description_mappings(
-        list_of_raw_address=list_of_raw_address
+    col_to_desc_map = sal.get_col_to_description_mappings(
+        list_of_unsanitised_address=addressList
     )
 
     # Read csv of user input
diff --git a/applications/postcode_splitter/handler.py b/applications/postcode_splitter/handler.py
index 071ff6f9..6614ecda 100644
--- a/applications/postcode_splitter/handler.py
+++ b/applications/postcode_splitter/handler.py
@@ -12,8 +12,8 @@ from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
 from infrastructure.csv_s3_client import CsvS3Client
 from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
 from orchestration.task_orchestrator import TaskOrchestrator
-from repositories.raw_address.raw_address_list_csv_s3_repository import (
-    RawAddressListCsvS3Repository,
+from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import (
+    UnsanitisedAddressListCsvS3Repository,
 )
 from utilities.aws_lambda.subtask_handler import subtask_handler
 
@@ -36,12 +36,12 @@ def handler(
     boto_sqs: Any = boto3_client("sqs")
 
     csv_client = CsvS3Client(boto_s3, bucket)
-    user_address_repo = RawAddressListCsvS3Repository(csv_client, bucket)
+    unsanitised_address_repo = UnsanitisedAddressListCsvS3Repository(csv_client, bucket)
     queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
 
     splitter = PostcodeSplitterOrchestrator(
         task_orchestrator=task_orchestrator,
-        user_address_repo=user_address_repo,
+        unsanitised_address_repo=unsanitised_address_repo,
         queue_client=queue_client,
     )
 
diff --git a/domain/addresses/postcode_batching.py b/domain/addresses/postcode_batching.py
index dd7203b1..18135dbd 100644
--- a/domain/addresses/postcode_batching.py
+++ b/domain/addresses/postcode_batching.py
@@ -2,12 +2,12 @@ from __future__ import annotations
 
 from collections.abc import Iterable, Iterator
 
-from domain.addresses.raw_address import AddressList, RawAddress
+from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
 from domain.postcode import Postcode
 
 
 def iter_postcode_grouped_batches(
-    addresses: Iterable[RawAddress],
+    addresses: Iterable[UnsanitisedAddress],
     *,
     max_batch_size: int = 500,
 ) -> Iterator[AddressList]:
@@ -43,7 +43,7 @@ def iter_postcode_grouped_batches(
 
 
 def _group_by_postcode_in_order(
-    addresses: Iterable[RawAddress],
+    addresses: Iterable[UnsanitisedAddress],
 ) -> dict[Postcode, AddressList]:
     groups: dict[Postcode, AddressList] = {}
     for address in addresses:
diff --git a/domain/addresses/raw_address.py b/domain/addresses/unsanitised_address.py
similarity index 84%
rename from domain/addresses/raw_address.py
rename to domain/addresses/unsanitised_address.py
index f9a2789e..a33f0d88 100644
--- a/domain/addresses/raw_address.py
+++ b/domain/addresses/unsanitised_address.py
@@ -11,7 +11,7 @@ def _empty_source_row() -> dict[str, str]:
 
 
 @dataclass(frozen=True)
-class RawAddress:
+class UnsanitisedAddress:
     address: str
     postcode: Postcode
     org_reference: Optional[str] = None
@@ -21,4 +21,4 @@ class RawAddress:
 
 
 # A batch of raw, pre-standardisation addresses as supplied by a landlord.
-AddressList = NewType("AddressList", list[RawAddress])
+AddressList = NewType("AddressList", list[UnsanitisedAddress])
diff --git a/orchestration/postcode_splitter_orchestrator.py b/orchestration/postcode_splitter_orchestrator.py
index 20145524..d8d81c65 100644
--- a/orchestration/postcode_splitter_orchestrator.py
+++ b/orchestration/postcode_splitter_orchestrator.py
@@ -5,8 +5,8 @@ from uuid import UUID
 from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
 from orchestration.task_orchestrator import TaskOrchestrator
 from domain.addresses.postcode_batching import iter_postcode_grouped_batches
-from repositories.raw_address.raw_address_list_repository import (
-    RawAddressListRepository,
+from repositories.unsanitised_address.unsanitised_address_list_repository import (
+    UnsanitisedAddressListRepository,
 )
 
 
@@ -14,12 +14,12 @@ class PostcodeSplitterOrchestrator:
     def __init__(
         self,
         task_orchestrator: TaskOrchestrator,
-        raw_address_repo: RawAddressListRepository,
+        unsanitised_address_repo: UnsanitisedAddressListRepository,
         queue_client: Address2UprnQueueClient,
         max_batch_size: int = 500,
     ) -> None:
         self._task_orchestrator = task_orchestrator
-        self._raw_address_repo = raw_address_repo
+        self._unsanitised_address_repo = unsanitised_address_repo
         self._queue_client = queue_client
         self._max_batch_size = max_batch_size
 
@@ -30,7 +30,7 @@ class PostcodeSplitterOrchestrator:
         parent_subtask_id: UUID,
         input_s3_uri: str,
     ) -> list[UUID]:
-        addresses = self._raw_address_repo.load_batch(input_s3_uri)
+        addresses = self._unsanitised_address_repo.load_batch(input_s3_uri)
         path_prefix = (
             f"ara_postcode_splitter_batches/{parent_task_id}/{parent_subtask_id}"
         )
@@ -39,7 +39,7 @@ class PostcodeSplitterOrchestrator:
         for batch in iter_postcode_grouped_batches(
             addresses, max_batch_size=self._max_batch_size
         ):
-            batch_uri = self._raw_address_repo.save_batch(batch, path_prefix)
+            batch_uri = self._unsanitised_address_repo.save_batch(batch, path_prefix)
             child = self._task_orchestrator.create_child_subtask(
                 parent_task_id,
                 inputs={
diff --git a/orchestration/sal_orchestrator.py b/orchestration/sal_orchestrator.py
index f55947e7..1eb768de 100644
--- a/orchestration/sal_orchestrator.py
+++ b/orchestration/sal_orchestrator.py
@@ -1,25 +1,25 @@
-from repositories.raw_address.raw_address_list_repository import (
-    RawAddressListRepository,
+from repositories.unsanitised_address.unsanitised_address_list_repository import (
+    UnsanitisedAddressListRepository,
 )
-from domain.addresses.raw_address import AddressList
+from domain.addresses.unsanitised_address import AddressList
 
 
 class SALOrchestrator:
-    def __init__(self, raw_address_repo: RawAddressListRepository) -> None:
-        self._raw_address_repo = raw_address_repo
+    def __init__(self, unsanitised_address_repo: UnsanitisedAddressListRepository) -> None:
+        self._unsanitised_address_repo = unsanitised_address_repo
 
-    def get_raw_addresses(
+    def get_unsanitised_addresses(
         self,
         input_s3_uri: str,
     ) -> AddressList:
-        return self._raw_address_repo.load_batch(input_s3_uri)
+        return self._unsanitised_address_repo.load_batch(input_s3_uri)
 
     def get_col_to_description_mappings(
-        self, list_of_raw_address: AddressList
+        self, list_of_unsanitised_address: AddressList
     ) -> dict[str, set[str]]:
         mappings: dict[str, set[str]] = {}
-        for raw_address in list_of_raw_address:
-            for key, value in raw_address.additional_info.items():
+        for unsanitised_address in list_of_unsanitised_address:
+            for key, value in unsanitised_address.additional_info.items():
                 # Lower-case so case-only typos collapse to one variant.
                 mappings.setdefault(key, set()).add(value.lower())
         return mappings
diff --git a/repositories/raw_address/__init__.py b/repositories/unsanitised_address/__init__.py
similarity index 100%
rename from repositories/raw_address/__init__.py
rename to repositories/unsanitised_address/__init__.py
diff --git a/repositories/raw_address/raw_address_list_csv_s3_repository.py b/repositories/unsanitised_address/unsanitised_address_list_csv_s3_repository.py
similarity index 84%
rename from repositories/raw_address/raw_address_list_csv_s3_repository.py
rename to repositories/unsanitised_address/unsanitised_address_list_csv_s3_repository.py
index a636b17b..6c382df0 100644
--- a/repositories/raw_address/raw_address_list_csv_s3_repository.py
+++ b/repositories/unsanitised_address/unsanitised_address_list_csv_s3_repository.py
@@ -4,11 +4,11 @@ import uuid
 from datetime import datetime, timezone
 from typing import Optional
 
-from domain.addresses.raw_address import AddressList, RawAddress
+from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
 from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
-from repositories.raw_address.raw_address_list_repository import (
-    RawAddressListRepository,
+from repositories.unsanitised_address.unsanitised_address_list_repository import (
+    UnsanitisedAddressListRepository,
 )
 
 _ADDRESS_COLUMNS: tuple[str, str, str] = ("Address 1", "Address 2", "Address 3")
@@ -17,7 +17,7 @@ _INTERNAL_REFERENCE_COLUMN: str = "Internal Reference"
 _POSTCODE_CLEAN_COLUMN: str = "postcode_clean"
 
 
-class RawAddressListCsvS3Repository(RawAddressListRepository):
+class UnsanitisedAddressListCsvS3Repository(UnsanitisedAddressListRepository):
     def __init__(self, csv_client: CsvS3Client, bucket: str) -> None:
         self._csv_client = csv_client
         self._bucket = bucket
@@ -36,13 +36,13 @@ class RawAddressListCsvS3Repository(RawAddressListRepository):
                 for col in _ADDRESS_COLUMNS
                 if col in row and row[col].strip()
             ]
-            raw_address = ", ".join(parts)
+            unsanitised_address = ", ".join(parts)
             postcode = row.get(_POSTCODE_COLUMN, "")
             raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip()
             internal_reference: Optional[str] = raw_ref or None
             addresses.append(
-                RawAddress(
-                    address=raw_address,
+                UnsanitisedAddress(
+                    address=unsanitised_address,
                     postcode=Postcode(postcode),
                     org_reference=internal_reference,
                     additional_info=row,
diff --git a/repositories/raw_address/raw_address_list_repository.py b/repositories/unsanitised_address/unsanitised_address_list_repository.py
similarity index 70%
rename from repositories/raw_address/raw_address_list_repository.py
rename to repositories/unsanitised_address/unsanitised_address_list_repository.py
index 8abb96be..2f842fcd 100644
--- a/repositories/raw_address/raw_address_list_repository.py
+++ b/repositories/unsanitised_address/unsanitised_address_list_repository.py
@@ -2,10 +2,10 @@ from __future__ import annotations
 
 from abc import ABC, abstractmethod
 
-from domain.addresses.raw_address import AddressList
+from domain.addresses.unsanitised_address import AddressList
 
 
-class RawAddressListRepository(ABC):
+class UnsanitisedAddressListRepository(ABC):
     @abstractmethod
     def load_batch(self, s3_uri: str) -> AddressList: ...
 
diff --git a/tests/domain/addresses/test_postcode_batching.py b/tests/domain/addresses/test_postcode_batching.py
index c7bb2d00..443e43df 100644
--- a/tests/domain/addresses/test_postcode_batching.py
+++ b/tests/domain/addresses/test_postcode_batching.py
@@ -1,14 +1,14 @@
 import pytest
 
 from domain.addresses.postcode_batching import iter_postcode_grouped_batches
-from domain.addresses.raw_address import AddressList, RawAddress
+from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
 from domain.postcode import Postcode
 
 
 def _addrs(postcode: str, n: int) -> AddressList:
     return AddressList(
         [
-            RawAddress(address=f"{i} {postcode} Street", postcode=Postcode(postcode))
+            UnsanitisedAddress(address=f"{i} {postcode} Street", postcode=Postcode(postcode))
             for i in range(n)
         ]
     )
diff --git a/tests/domain/addresses/test_raw_address.py b/tests/domain/addresses/test_unsanitised_address.py
similarity index 51%
rename from tests/domain/addresses/test_raw_address.py
rename to tests/domain/addresses/test_unsanitised_address.py
index 0309b45e..aa6d0071 100644
--- a/tests/domain/addresses/test_raw_address.py
+++ b/tests/domain/addresses/test_unsanitised_address.py
@@ -2,36 +2,36 @@ import dataclasses
 
 import pytest
 
-from domain.addresses.raw_address import RawAddress
+from domain.addresses.unsanitised_address import UnsanitisedAddress
 from domain.postcode import Postcode
 
 
-def test_raw_address_holds_postcode_value_object() -> None:
+def test_unsanitised_address_holds_postcode_value_object() -> None:
     # act
-    addr = RawAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
     # assert
     assert addr.postcode == Postcode("SW1A1AA")
 
 
-def test_raw_address_preserves_raw_address_verbatim() -> None:
-    # The free-text raw_address string is intentionally NOT normalised --
+def test_unsanitised_address_preserves_unsanitised_address_verbatim() -> None:
+    # The free-text unsanitised_address string is intentionally NOT normalised --
     # only the postcode is canonicalised, and that happens inside Postcode.
     # act
-    addr = RawAddress(address="  1 The   Street  ", postcode=Postcode("SW1A1AA"))
+    addr = UnsanitisedAddress(address="  1 The   Street  ", postcode=Postcode("SW1A1AA"))
     # assert
     assert addr.address == "  1 The   Street  "
 
 
-def test_raw_address_internal_reference_defaults_to_none() -> None:
+def test_unsanitised_address_internal_reference_defaults_to_none() -> None:
     # act
-    addr = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # assert
     assert addr.org_reference is None
 
 
-def test_raw_address_internal_reference_accepted() -> None:
+def test_unsanitised_address_internal_reference_accepted() -> None:
     # act
-    addr = RawAddress(
+    addr = UnsanitisedAddress(
         address="1 The Street",
         postcode=Postcode("SW1A1AA"),
         org_reference="cust-42",
@@ -40,36 +40,36 @@ def test_raw_address_internal_reference_accepted() -> None:
     assert addr.org_reference == "cust-42"
 
 
-def test_raw_address_is_frozen() -> None:
+def test_unsanitised_address_is_frozen() -> None:
     # arrange
-    addr = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # act / assert
     with pytest.raises(dataclasses.FrozenInstanceError):
         addr.postcode = Postcode("OTHER")  # type: ignore[misc]
 
 
-def test_raw_address_equality_uses_canonical_postcode() -> None:
+def test_unsanitised_address_equality_uses_canonical_postcode() -> None:
     # Postcode sanitises eagerly, so addresses built from different surface
     # forms of the same postcode compare equal.
     # arrange
-    a = RawAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
-    b = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    a = UnsanitisedAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    b = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # act / assert
     assert a == b
 
 
-def test_raw_address_source_row_defaults_to_empty_dict() -> None:
+def test_unsanitised_address_source_row_defaults_to_empty_dict() -> None:
     # act
-    addr = RawAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # assert
     assert addr.additional_info == {}
 
 
-def test_raw_address_carries_source_row() -> None:
+def test_unsanitised_address_carries_source_row() -> None:
     # arrange
     row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
     # act
-    addr = RawAddress(
+    addr = UnsanitisedAddress(
         address="1 The Street",
         postcode=Postcode("SW1A 1AA"),
         additional_info=row,
@@ -78,16 +78,16 @@ def test_raw_address_carries_source_row() -> None:
     assert addr.additional_info == row
 
 
-def test_raw_address_equality_ignores_source_row() -> None:
+def test_unsanitised_address_equality_ignores_source_row() -> None:
     # source_row is excluded from equality (and hashing): identity stays
     # defined by the parsed fields.
     # arrange
-    a = RawAddress(
+    a = UnsanitisedAddress(
         address="1 The Street",
         postcode=Postcode("SW1A1AA"),
         additional_info={"x": "1"},
     )
-    b = RawAddress(
+    b = UnsanitisedAddress(
         address="1 The Street",
         postcode=Postcode("SW1A1AA"),
         additional_info={"y": "2"},
diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
index 133d5b39..7e2c5167 100644
--- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py
+++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
@@ -1,16 +1,16 @@
 from __future__ import annotations
 
-from domain.addresses.raw_address import AddressList, RawAddress
+from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
 from domain.postcode import Postcode
 from orchestration.sal_orchestrator import (
     SALOrchestrator,
 )
-from repositories.raw_address.raw_address_list_repository import (
-    RawAddressListRepository,
+from repositories.unsanitised_address.unsanitised_address_list_repository import (
+    UnsanitisedAddressListRepository,
 )
 
 
-class _StubRawAddressRepository(RawAddressListRepository):
+class _StubUnsanitisedAddressRepository(UnsanitisedAddressListRepository):
     """``get_col_to_description_mappings`` never touches the repo."""
 
     def load_batch(self, s3_uri: str) -> AddressList:
@@ -20,8 +20,8 @@ class _StubRawAddressRepository(RawAddressListRepository):
         raise NotImplementedError()
 
 
-def _make_raw_address(landlord_additional_info: dict[str, str]) -> RawAddress:
-    return RawAddress(
+def _make_unsanitised_address(landlord_additional_info: dict[str, str]) -> UnsanitisedAddress:
+    return UnsanitisedAddress(
         address="1 High St",
         postcode=Postcode("AA1 1AA"),
         additional_info=landlord_additional_info,
@@ -29,16 +29,16 @@ def _make_raw_address(landlord_additional_info: dict[str, str]) -> RawAddress:
 
 
 def _orchestrator() -> SALOrchestrator:
-    return SALOrchestrator(raw_address_repo=_StubRawAddressRepository())
+    return SALOrchestrator(unsanitised_address_repo=_StubUnsanitisedAddressRepository())
 
 
 def test_collects_every_value_per_shared_key() -> None:
     # arrange: every address carries the same keys, all values distinct.
     addresses = AddressList(
         [
-            _make_raw_address({"description": "cosy", "condition": "new"}),
-            _make_raw_address({"description": "spacious", "condition": "worn"}),
-            _make_raw_address({"description": "bright", "condition": "fair"}),
+            _make_unsanitised_address({"description": "cosy", "condition": "new"}),
+            _make_unsanitised_address({"description": "spacious", "condition": "worn"}),
+            _make_unsanitised_address({"description": "bright", "condition": "fair"}),
         ]
     )
 
@@ -56,9 +56,9 @@ def test_repeated_values_collapse_to_one_variant() -> None:
     # arrange: two addresses share the same wall description.
     addresses = AddressList(
         [
-            _make_raw_address({"description": "cosy"}),
-            _make_raw_address({"description": "cosy"}),
-            _make_raw_address({"description": "bright"}),
+            _make_unsanitised_address({"description": "cosy"}),
+            _make_unsanitised_address({"description": "cosy"}),
+            _make_unsanitised_address({"description": "bright"}),
         ]
     )
 
@@ -73,9 +73,9 @@ def test_case_only_variants_collapse_to_one() -> None:
     # arrange: the same description typed with inconsistent casing.
     addresses = AddressList(
         [
-            _make_raw_address({"description": "Cosy"}),
-            _make_raw_address({"description": "cosy"}),
-            _make_raw_address({"description": "COSY"}),
+            _make_unsanitised_address({"description": "Cosy"}),
+            _make_unsanitised_address({"description": "cosy"}),
+            _make_unsanitised_address({"description": "COSY"}),
         ]
     )
 
@@ -96,7 +96,7 @@ def test_empty_address_list_yields_empty_mapping() -> None:
 
 def test_single_address_yields_single_value_per_key() -> None:
     # arrange
-    addresses = AddressList([_make_raw_address({"description": "cosy"})])
+    addresses = AddressList([_make_unsanitised_address({"description": "cosy"})])
 
     # act
     mappings = _orchestrator().get_col_to_description_mappings(addresses)
diff --git a/tests/orchestration/test_postcode_splitter_orchestrator.py b/tests/orchestration/test_postcode_splitter_orchestrator.py
index 1540112f..4317156c 100644
--- a/tests/orchestration/test_postcode_splitter_orchestrator.py
+++ b/tests/orchestration/test_postcode_splitter_orchestrator.py
@@ -18,8 +18,8 @@ from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchest
 from orchestration.task_orchestrator import TaskOrchestrator
 from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
 from repositories.tasks.task_postgres_repository import TaskPostgresRepository
-from repositories.raw_address.raw_address_list_csv_s3_repository import (
-    RawAddressListCsvS3Repository,
+from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import (
+    UnsanitisedAddressListCsvS3Repository,
 )
 
 BUCKET = "splitter-bucket"
@@ -64,7 +64,7 @@ class Harness:
     csv_client: CsvS3Client
     boto_sqs: Any
     queue_url: str
-    repo: RawAddressListCsvS3Repository
+    repo: UnsanitisedAddressListCsvS3Repository
 
 
 @pytest.fixture
@@ -78,7 +78,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]:
         queue_url = cast(str, queue["QueueUrl"])
 
         csv_client = CsvS3Client(boto_s3, BUCKET)
-        repo = RawAddressListCsvS3Repository(csv_client, BUCKET)
+        repo = UnsanitisedAddressListCsvS3Repository(csv_client, BUCKET)
         queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
 
         # DB: ephemeral PostgreSQL TaskOrchestrator
@@ -91,7 +91,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]:
 
             splitter = PostcodeSplitterOrchestrator(
                 task_orchestrator=task_orchestrator,
-                raw_address_repo=repo,
+                unsanitised_address_repo=repo,
                 queue_client=queue_client,
                 max_batch_size=3,
             )
diff --git a/tests/repositories/raw_address/__init__.py b/tests/repositories/unsanitised_address/__init__.py
similarity index 100%
rename from tests/repositories/raw_address/__init__.py
rename to tests/repositories/unsanitised_address/__init__.py
diff --git a/tests/repositories/raw_address/conftest.py b/tests/repositories/unsanitised_address/conftest.py
similarity index 100%
rename from tests/repositories/raw_address/conftest.py
rename to tests/repositories/unsanitised_address/conftest.py
diff --git a/tests/repositories/raw_address/test_raw_address_list_csv_s3_repository.py b/tests/repositories/unsanitised_address/test_unsanitised_address_list_csv_s3_repository.py
similarity index 86%
rename from tests/repositories/raw_address/test_raw_address_list_csv_s3_repository.py
rename to tests/repositories/unsanitised_address/test_unsanitised_address_list_csv_s3_repository.py
index 8870b29a..ff26f08a 100644
--- a/tests/repositories/raw_address/test_raw_address_list_csv_s3_repository.py
+++ b/tests/repositories/unsanitised_address/test_unsanitised_address_list_csv_s3_repository.py
@@ -3,11 +3,11 @@ from collections.abc import Iterator
 import pytest
 from moto import mock_aws
 
-from domain.addresses.raw_address import AddressList, RawAddress
+from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
 from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
-from repositories.raw_address.raw_address_list_csv_s3_repository import (
-    RawAddressListCsvS3Repository,
+from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import (
+    UnsanitisedAddressListCsvS3Repository,
 )
 from tests.infrastructure import make_boto_client
 
@@ -15,22 +15,22 @@ BUCKET = "user-address-bucket"
 
 
 @pytest.fixture
-def repo() -> Iterator[RawAddressListCsvS3Repository]:
+def repo() -> Iterator[UnsanitisedAddressListCsvS3Repository]:
     with mock_aws():
         boto_client = make_boto_client("s3")
         boto_client.create_bucket(Bucket=BUCKET)
         csv_client = CsvS3Client(boto_client, BUCKET)
-        yield RawAddressListCsvS3Repository(csv_client, BUCKET)
+        yield UnsanitisedAddressListCsvS3Repository(csv_client, BUCKET)
 
 
 def _upload_csv(
-    repo: RawAddressListCsvS3Repository, rows: list[dict[str, str]], key: str
+    repo: UnsanitisedAddressListCsvS3Repository, rows: list[dict[str, str]], key: str
 ) -> str:
     return repo._csv_client.save_rows(rows, key)  # pyright: ignore[reportPrivateUsage]
 
 
 def test_load_batch_parses_address_postcode_and_reference(
-    repo: RawAddressListCsvS3Repository,
+    repo: UnsanitisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -56,7 +56,7 @@ def test_load_batch_parses_address_postcode_and_reference(
 
 
 def test_load_batch_uses_only_address_1_when_others_missing(
-    repo: RawAddressListCsvS3Repository,
+    repo: UnsanitisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -81,7 +81,7 @@ def test_load_batch_uses_only_address_1_when_others_missing(
 
 
 def test_load_batch_handles_missing_internal_reference(
-    repo: RawAddressListCsvS3Repository,
+    repo: UnsanitisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -106,10 +106,10 @@ def test_load_batch_handles_missing_internal_reference(
 
 
 def test_load_batch_captures_full_source_row(
-    repo: RawAddressListCsvS3Repository,
+    repo: UnsanitisedAddressListCsvS3Repository,
 ) -> None:
     # A raw EPC-export-shaped row: the splitter must preserve every column,
-    # not just the ones it parses into RawAddress fields.
+    # not just the ones it parses into UnsanitisedAddress fields.
     # arrange
     row = {
         "Asset Reference": "511",
@@ -128,7 +128,7 @@ def test_load_batch_captures_full_source_row(
 
 
 def test_load_batch_raises_when_postcode_column_absent(
-    repo: RawAddressListCsvS3Repository,
+    repo: UnsanitisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [{"Address 1": "1 High Street", "Property Type": "Flat"}]
@@ -140,7 +140,7 @@ def test_load_batch_raises_when_postcode_column_absent(
 
 
 def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
-    repo: RawAddressListCsvS3Repository,
+    repo: UnsanitisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     row = {
@@ -169,12 +169,12 @@ def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
 
 
 def test_save_batch_returns_uri_under_path_prefix(
-    repo: RawAddressListCsvS3Repository,
+    repo: UnsanitisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     addresses = AddressList(
         [
-            RawAddress(
+            UnsanitisedAddress(
                 address="1 High Street",
                 postcode=Postcode("SW1A 1AA"),
                 additional_info={
@@ -194,7 +194,7 @@ def test_save_batch_returns_uri_under_path_prefix(
 
 
 def test_save_then_reload_round_trip_preserves_columns(
-    repo: RawAddressListCsvS3Repository,
+    repo: UnsanitisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -227,12 +227,12 @@ def test_save_then_reload_round_trip_preserves_columns(
 
 
 def test_save_batch_uses_unique_filename_per_call(
-    repo: RawAddressListCsvS3Repository,
+    repo: UnsanitisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     addresses = AddressList(
         [
-            RawAddress(
+            UnsanitisedAddress(
                 address="1 High Street",
                 postcode=Postcode("SW1A 1AA"),
                 additional_info={

From 61efcad27b5ac309fcc1dd87dddee610fa9f1a1e Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 22 May 2026 10:13:32 +0000
Subject: [PATCH 14/29] standardist Address

---
 UBIQUITOUS_LANGUAGE.md                        | 22 ++++++----
 applications/SAL/handler.py                   | 25 ++++++++---
 applications/postcode_splitter/handler.py     |  8 ++--
 domain/addresses/postcode_batching.py         |  6 +--
 domain/addresses/standardised_address_list.py | 21 +++++++++
 ...d_address.py => unstandardised_address.py} |  4 +-
 .../postcode_splitter_orchestrator.py         | 12 ++---
 orchestration/sal_orchestrator.py             | 20 ++++-----
 .../__init__.py                               |  0
 ...ardised_address_list_csv_s3_repository.py} | 14 +++---
 ...unstandardised_address_list_repository.py} |  4 +-
 .../addresses/test_postcode_batching.py       |  4 +-
 ...ress.py => test_unstandardised_address.py} | 44 +++++++++----------
 ...lord_description_overrides_orchestrator.py | 34 +++++++-------
 .../test_postcode_splitter_orchestrator.py    | 10 ++---
 .../__init__.py                               |  0
 .../conftest.py                               |  0
 ...ardised_address_list_csv_s3_repository.py} | 36 +++++++--------
 18 files changed, 151 insertions(+), 113 deletions(-)
 create mode 100644 domain/addresses/standardised_address_list.py
 rename domain/addresses/{unsanitised_address.py => unstandardised_address.py} (84%)
 rename repositories/{unsanitised_address => unstandardised_address}/__init__.py (100%)
 rename repositories/{unsanitised_address/unsanitised_address_list_csv_s3_repository.py => unstandardised_address/unstandardised_address_list_csv_s3_repository.py} (83%)
 rename repositories/{unsanitised_address/unsanitised_address_list_repository.py => unstandardised_address/unstandardised_address_list_repository.py} (69%)
 rename tests/domain/addresses/{test_unsanitised_address.py => test_unstandardised_address.py} (52%)
 rename tests/repositories/{unsanitised_address => unstandardised_address}/__init__.py (100%)
 rename tests/repositories/{unsanitised_address => unstandardised_address}/conftest.py (100%)
 rename tests/repositories/{unsanitised_address/test_unsanitised_address_list_csv_s3_repository.py => unstandardised_address/test_unstandardised_address_list_csv_s3_repository.py} (85%)

diff --git a/UBIQUITOUS_LANGUAGE.md b/UBIQUITOUS_LANGUAGE.md
index c3074c02..d2fde99a 100644
--- a/UBIQUITOUS_LANGUAGE.md
+++ b/UBIQUITOUS_LANGUAGE.md
@@ -23,16 +23,18 @@ Invoke `/ubiquitous-language` in any session to extract new terms from the conve
 |------|------------|------------------|
 | **UPRN** | Unique Property Reference Number — the government-issued permanent identifier for a physical address in the UK. | "property ID", "address ID", "code" |
 | **Postcode** | A UK postal code used to group nearby addresses; the primary search key for finding EPC records. | "zip code", "postal code" |
-| **User Address** | A structured dataclass (`domain.addresses.user_address.UserAddress`) capturing a customer-supplied address: a free-text `user_address` line, a canonical `postcode` (sanitised on construction), and an optional `internal_reference`. The bare string sense -- the raw free-text address line as it arrives from upstream ingestion, before being wrapped -- remains valid when discussing CSV columns, API payloads, or other upstream contexts; in domain code, prefer the dataclass. | "user input", "raw address", "user_inputed_address" |
+| **Unstandardised Address** | A frozen dataclass (`domain.addresses.unstandardised_address.UnstandardisedAddress`) capturing a single address exactly as a customer supplied it, before any standardisation: a free-text `address` line (intentionally NOT normalised), a canonical `postcode` (a `Postcode` value object, sanitised on construction), an optional `org_reference` (the customer's own identifier for the property), and `additional_info` (the full source row — every column of the customer's upload, preserved verbatim). | "user address", "asset list", "raw address", "landlord address", "Hyde address" |
+| **Address List** | A nominal `NewType` over `list[UnstandardisedAddress]` (`domain.addresses.unstandardised_address.AddressList`) — a batch of unstandardised addresses, such as one customer's bulk-onboarding upload or a postcode-grouped sub-batch produced for downstream processing. Being nominal, it is constructed explicitly: `AddressList([...])`. It is the raw *input* to ingestion; the standardised *output* is a **Standardised Asset List**. | "asset list", "Hyde address list", "user addresses" |
+| **Standardised Asset List (SAL)** | A customer's property portfolio after ingestion has cleaned and standardised it — each property carrying a canonical field set (UPRN, standardised address, postcode, property type, built form, …). It is the standardised *output* of the pipeline whose raw *input* is an **Address List** of **Unstandardised Addresses**; generated by the `SALOrchestrator`. (Legacy implementation: `asset_list.AssetList` via `load_standardised_asset_list`.) | "address list" (that is the raw input), "asset register", "portfolio list" |
 | **Dwelling** | A single residential unit that can hold an EPC — a house, flat, or maisonette. | "property", "unit", "home" |
 
 ## Address Matching
 
 | Term | Definition | Aliases to avoid |
 |------|------------|------------------|
-| **Lexiscore** | A similarity score in [0, 1] between a user address and a candidate EPC address; combines token overlap and character-level similarity. | "score", "match score", "similarity" |
+| **Lexiscore** | A similarity score in [0, 1] between an unstandardised address and a candidate EPC address; combines token overlap and character-level similarity. | "score", "match score", "similarity" |
 | **Lexirank** | Dense rank of candidates sorted by lexiscore descending; rank 1 = best match. | "rank", "position" |
-| **UPRN Candidate** | An EPC search result that is a plausible match for a given user address, before scoring decides the winner. | "match candidate", "result" |
+| **UPRN Candidate** | An EPC search result that is a plausible match for a given unstandardised address, before scoring decides the winner. | "match candidate", "result" |
 | **Score Threshold** | The minimum lexiscore (currently 0.6) below which no match is returned even if a candidate exists. | "minimum score", "cutoff" |
 | **Ambiguous Match** | A matching outcome where two or more candidates share lexirank 1, making it impossible to select a unique winner. | "tie", "draw", "duplicate" |
 | **Best Match** | The single UPRN candidate with lexirank 1 that meets or exceeds the score threshold. | "winner", "top result" |
@@ -53,14 +55,16 @@ Invoke `/ubiquitous-language` in any session to extract new terms from the conve
 - A **Dwelling** may have multiple **EPCs** across time; the one with the most recent **Registration Date** is the current one.
 - A **UPRN** identifies a **Dwelling** permanently; it does not change when the property changes owner.
 - An **EPC Search Result** is a summary; it points to a full **EPC** via its **Certificate Number**.
-- **Address Matching** uses a **User Address** and **Postcode** to find a **UPRN** by scoring **UPRN Candidates** from an EPC search.
+- An **Address List** is an ordered batch of **Unstandardised Addresses**; a customer's bulk-onboarding upload arrives as one.
+- Ingestion turns an **Address List** (raw input) into a **Standardised Asset List** (standardised output) — the **SAL Orchestrator** drives this.
+- **Address Matching** uses an **Unstandardised Address** and **Postcode** to find a **UPRN** by scoring **UPRN Candidates** from an EPC search.
 - A **Lexirank** of 1 with no **Ambiguous Match** and a **Lexiscore** ≥ the **Score Threshold** produces a **Best Match**.
 
 ## Example dialogue
 
-> **Dev:** "We have a user address and postcode. How do we find the UPRN?"
+> **Dev:** "We have an unstandardised address and postcode. How do we find the UPRN?"
 
-> **Domain expert:** "Search the **New EPC API** by **Postcode** — you get back a list of **EPC Search Results** for that area. Each one has an address and a **UPRN**. Score each against the **User Address** using the **Lexiscore**. If the top **UPRN Candidate** scores above the **Score Threshold** and there's no **Ambiguous Match**, that's your **Best Match**."
+> **Domain expert:** "Search the **New EPC API** by **Postcode** — you get back a list of **EPC Search Results** for that area. Each one has an address and a **UPRN**. Score each against the **Unstandardised Address** using the **Lexiscore**. If the top **UPRN Candidate** scores above the **Score Threshold** and there's no **Ambiguous Match**, that's your **Best Match**."
 
 > **Dev:** "What if two results share the same address line 1?"
 
@@ -72,7 +76,9 @@ Invoke `/ubiquitous-language` in any session to extract new terms from the conve
 
 ## Flagged ambiguities
 
-- **"address"** appears as both the raw **User Address** (free-text from customer data, or the structured `UserAddress` dataclass that wraps it) and a structured field on an **EPC Search Result** (normalised address lines). Always qualify: "user address" vs "EPC address" or "address line 1". Within `domain/`, **User Address** specifically means the `UserAddress` dataclass; in upstream ingestion contexts (CSV columns, SQS payloads) it can still mean the raw string sense.
+- **"address"** appears in several senses: the **Unstandardised Address** dataclass (one customer-supplied address before standardisation), its free-text `address` field, and the normalised address lines on an **EPC Search Result**. Always qualify: "unstandardised address" vs "EPC address" or "address line 1". Within `domain/addresses/`, the dataclass is **Unstandardised Address**; in upstream ingestion contexts (CSV columns, SQS payloads) "address" may still mean the bare free-text string.
 - **"score"** is used for the `AddressMatch.score()` function output, the `lexiscore` DataFrame column, and informally in conversation. Prefer **Lexiscore** in domain discussions; reserve "score" for method-level code comments.
-- **"user_inputed_address"** in `backend/address2UPRN/main.py` is a misspelling and a synonym for **User Address** — the canonical term. New code should use `user_address`.
+- **"user_inputed_address"** (and `user_address`) in `backend/address2UPRN/` is legacy naming — a misspelled synonym for what is now the **Unstandardised Address**. That address-matching code has not been renamed; new code should use **Unstandardised Address**.
+- **"Hyde address list"** — "Hyde" is the name of one customer, not a domain concept. A domain expert may say "the Hyde address list" because Hyde is the customer in front of them, but the generalised term is **Address List** (and **Unstandardised Address** for a single item). A customer's identity is data — it belongs in `org_reference` or `additional_info`, never in a type or module name.
+- **"address list"** vs **"asset list"** — opposite ends of the ingestion pipeline; do not conflate them. An **Address List** is the raw *input* (unstandardised addresses as the customer supplied them); a **Standardised Asset List** is the standardised *output*. The historical `AssetList` dataclass (now **Unstandardised Address**) misnamed the input an "asset list" — that mistake is what the rename corrected.
 - **"EPC"** is overloaded as both the document (an Energy Performance Certificate) and the rating band letter. Use **EPC** for the document and **EPC Band** for the letter.
diff --git a/applications/SAL/handler.py b/applications/SAL/handler.py
index fbed3b83..6076a662 100644
--- a/applications/SAL/handler.py
+++ b/applications/SAL/handler.py
@@ -4,10 +4,10 @@ from orchestration.sal_orchestrator import (
     SALOrchestrator,
 )
 from infrastructure.csv_s3_client import CsvS3Client
-from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import (
-    UnsanitisedAddressListCsvS3Repository,
+from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import (
+    UnstandardisedAddressListCsvS3Repository,
 )
-from domain.addresses.unsanitised_address import AddressList
+from domain.addresses.unstandardised_address import AddressList
 
 
 def handler(
@@ -24,16 +24,16 @@ def handler(
     boto_s3: Any = boto3_client("s3")
 
     csv_client = CsvS3Client(boto_s3, bucket)
-    unsanitised_address_repo = UnsanitisedAddressListCsvS3Repository(csv_client, bucket)
+    unstandardised_address_repo = UnstandardisedAddressListCsvS3Repository(csv_client, bucket)
 
     sal = SALOrchestrator(
-        unsanitised_address_repo=unsanitised_address_repo,
+        unstandardised_address_repo=unstandardised_address_repo,
     )
 
-    addressList: AddressList = sal.get_unsanitised_addresses(input_s3_uri=s3_uri)
+    addressList: AddressList = sal.get_unstandardised_addresses(input_s3_uri=s3_uri)
 
     col_to_desc_map = sal.get_col_to_description_mappings(
-        list_of_unsanitised_address=addressList
+        list_of_unstandardised_address=addressList
     )
 
     # Read csv of user input
@@ -41,4 +41,15 @@ def handler(
     # { walls: "wall variation 1", "wall varition 2"}
     # Call chatgpt(input from landlord, our way of understanding the mapping) Retrun -> lanlordMapped
 
+
+    ENUM Walls:
+        cavity_wall_1976: 1
+    
+    # 1) COuld download site notes from pashub and get
+    # 2) Open Data communites API -> 
+    # 3) new api
+    
+    # User story:
+    # cavity: asbuilt (1976 - 1982): 
+
     return {"hello world": ["hello world"]}
diff --git a/applications/postcode_splitter/handler.py b/applications/postcode_splitter/handler.py
index 6614ecda..ac2c4e99 100644
--- a/applications/postcode_splitter/handler.py
+++ b/applications/postcode_splitter/handler.py
@@ -12,8 +12,8 @@ from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
 from infrastructure.csv_s3_client import CsvS3Client
 from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
 from orchestration.task_orchestrator import TaskOrchestrator
-from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import (
-    UnsanitisedAddressListCsvS3Repository,
+from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import (
+    UnstandardisedAddressListCsvS3Repository,
 )
 from utilities.aws_lambda.subtask_handler import subtask_handler
 
@@ -36,12 +36,12 @@ def handler(
     boto_sqs: Any = boto3_client("sqs")
 
     csv_client = CsvS3Client(boto_s3, bucket)
-    unsanitised_address_repo = UnsanitisedAddressListCsvS3Repository(csv_client, bucket)
+    unstandardised_address_repo = UnstandardisedAddressListCsvS3Repository(csv_client, bucket)
     queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
 
     splitter = PostcodeSplitterOrchestrator(
         task_orchestrator=task_orchestrator,
-        unsanitised_address_repo=unsanitised_address_repo,
+        unstandardised_address_repo=unstandardised_address_repo,
         queue_client=queue_client,
     )
 
diff --git a/domain/addresses/postcode_batching.py b/domain/addresses/postcode_batching.py
index 18135dbd..ca4cd752 100644
--- a/domain/addresses/postcode_batching.py
+++ b/domain/addresses/postcode_batching.py
@@ -2,12 +2,12 @@ from __future__ import annotations
 
 from collections.abc import Iterable, Iterator
 
-from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
+from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
 from domain.postcode import Postcode
 
 
 def iter_postcode_grouped_batches(
-    addresses: Iterable[UnsanitisedAddress],
+    addresses: Iterable[UnstandardisedAddress],
     *,
     max_batch_size: int = 500,
 ) -> Iterator[AddressList]:
@@ -43,7 +43,7 @@ def iter_postcode_grouped_batches(
 
 
 def _group_by_postcode_in_order(
-    addresses: Iterable[UnsanitisedAddress],
+    addresses: Iterable[UnstandardisedAddress],
 ) -> dict[Postcode, AddressList]:
     groups: dict[Postcode, AddressList] = {}
     for address in addresses:
diff --git a/domain/addresses/standardised_address_list.py b/domain/addresses/standardised_address_list.py
new file mode 100644
index 00000000..8e3f4fc7
--- /dev/null
+++ b/domain/addresses/standardised_address_list.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import NewType, Optional
+
+from domain.postcode import Postcode
+
+
+def _empty_source_row() -> dict[str, str]:
+    return {}
+
+
+@dataclass(frozen=True)
+class StandardisedAddress:
+    address: str
+    postcode: Postcode
+    org_reference: Optional[str] = None
+
+
+# Standardised Asset List -- the cleaned output counterpart to AddressList.
+SAL = NewType("SAL", list[StandardisedAddress])
diff --git a/domain/addresses/unsanitised_address.py b/domain/addresses/unstandardised_address.py
similarity index 84%
rename from domain/addresses/unsanitised_address.py
rename to domain/addresses/unstandardised_address.py
index a33f0d88..8917bdf4 100644
--- a/domain/addresses/unsanitised_address.py
+++ b/domain/addresses/unstandardised_address.py
@@ -11,7 +11,7 @@ def _empty_source_row() -> dict[str, str]:
 
 
 @dataclass(frozen=True)
-class UnsanitisedAddress:
+class UnstandardisedAddress:
     address: str
     postcode: Postcode
     org_reference: Optional[str] = None
@@ -21,4 +21,4 @@ class UnsanitisedAddress:
 
 
 # A batch of raw, pre-standardisation addresses as supplied by a landlord.
-AddressList = NewType("AddressList", list[UnsanitisedAddress])
+AddressList = NewType("AddressList", list[UnstandardisedAddress])
diff --git a/orchestration/postcode_splitter_orchestrator.py b/orchestration/postcode_splitter_orchestrator.py
index d8d81c65..1a7277d5 100644
--- a/orchestration/postcode_splitter_orchestrator.py
+++ b/orchestration/postcode_splitter_orchestrator.py
@@ -5,8 +5,8 @@ from uuid import UUID
 from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
 from orchestration.task_orchestrator import TaskOrchestrator
 from domain.addresses.postcode_batching import iter_postcode_grouped_batches
-from repositories.unsanitised_address.unsanitised_address_list_repository import (
-    UnsanitisedAddressListRepository,
+from repositories.unstandardised_address.unstandardised_address_list_repository import (
+    UnstandardisedAddressListRepository,
 )
 
 
@@ -14,12 +14,12 @@ class PostcodeSplitterOrchestrator:
     def __init__(
         self,
         task_orchestrator: TaskOrchestrator,
-        unsanitised_address_repo: UnsanitisedAddressListRepository,
+        unstandardised_address_repo: UnstandardisedAddressListRepository,
         queue_client: Address2UprnQueueClient,
         max_batch_size: int = 500,
     ) -> None:
         self._task_orchestrator = task_orchestrator
-        self._unsanitised_address_repo = unsanitised_address_repo
+        self._unstandardised_address_repo = unstandardised_address_repo
         self._queue_client = queue_client
         self._max_batch_size = max_batch_size
 
@@ -30,7 +30,7 @@ class PostcodeSplitterOrchestrator:
         parent_subtask_id: UUID,
         input_s3_uri: str,
     ) -> list[UUID]:
-        addresses = self._unsanitised_address_repo.load_batch(input_s3_uri)
+        addresses = self._unstandardised_address_repo.load_batch(input_s3_uri)
         path_prefix = (
             f"ara_postcode_splitter_batches/{parent_task_id}/{parent_subtask_id}"
         )
@@ -39,7 +39,7 @@ class PostcodeSplitterOrchestrator:
         for batch in iter_postcode_grouped_batches(
             addresses, max_batch_size=self._max_batch_size
         ):
-            batch_uri = self._unsanitised_address_repo.save_batch(batch, path_prefix)
+            batch_uri = self._unstandardised_address_repo.save_batch(batch, path_prefix)
             child = self._task_orchestrator.create_child_subtask(
                 parent_task_id,
                 inputs={
diff --git a/orchestration/sal_orchestrator.py b/orchestration/sal_orchestrator.py
index 1eb768de..8ad21388 100644
--- a/orchestration/sal_orchestrator.py
+++ b/orchestration/sal_orchestrator.py
@@ -1,25 +1,25 @@
-from repositories.unsanitised_address.unsanitised_address_list_repository import (
-    UnsanitisedAddressListRepository,
+from repositories.unstandardised_address.unstandardised_address_list_repository import (
+    UnstandardisedAddressListRepository,
 )
-from domain.addresses.unsanitised_address import AddressList
+from domain.addresses.unstandardised_address import AddressList
 
 
 class SALOrchestrator:
-    def __init__(self, unsanitised_address_repo: UnsanitisedAddressListRepository) -> None:
-        self._unsanitised_address_repo = unsanitised_address_repo
+    def __init__(self, unstandardised_address_repo: UnstandardisedAddressListRepository) -> None:
+        self._unstandardised_address_repo = unstandardised_address_repo
 
-    def get_unsanitised_addresses(
+    def get_unstandardised_addresses(
         self,
         input_s3_uri: str,
     ) -> AddressList:
-        return self._unsanitised_address_repo.load_batch(input_s3_uri)
+        return self._unstandardised_address_repo.load_batch(input_s3_uri)
 
     def get_col_to_description_mappings(
-        self, list_of_unsanitised_address: AddressList
+        self, list_of_unstandardised_address: AddressList
     ) -> dict[str, set[str]]:
         mappings: dict[str, set[str]] = {}
-        for unsanitised_address in list_of_unsanitised_address:
-            for key, value in unsanitised_address.additional_info.items():
+        for unstandardised_address in list_of_unstandardised_address:
+            for key, value in unstandardised_address.additional_info.items():
                 # Lower-case so case-only typos collapse to one variant.
                 mappings.setdefault(key, set()).add(value.lower())
         return mappings
diff --git a/repositories/unsanitised_address/__init__.py b/repositories/unstandardised_address/__init__.py
similarity index 100%
rename from repositories/unsanitised_address/__init__.py
rename to repositories/unstandardised_address/__init__.py
diff --git a/repositories/unsanitised_address/unsanitised_address_list_csv_s3_repository.py b/repositories/unstandardised_address/unstandardised_address_list_csv_s3_repository.py
similarity index 83%
rename from repositories/unsanitised_address/unsanitised_address_list_csv_s3_repository.py
rename to repositories/unstandardised_address/unstandardised_address_list_csv_s3_repository.py
index 6c382df0..260fce1d 100644
--- a/repositories/unsanitised_address/unsanitised_address_list_csv_s3_repository.py
+++ b/repositories/unstandardised_address/unstandardised_address_list_csv_s3_repository.py
@@ -4,11 +4,11 @@ import uuid
 from datetime import datetime, timezone
 from typing import Optional
 
-from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
+from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
 from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
-from repositories.unsanitised_address.unsanitised_address_list_repository import (
-    UnsanitisedAddressListRepository,
+from repositories.unstandardised_address.unstandardised_address_list_repository import (
+    UnstandardisedAddressListRepository,
 )
 
 _ADDRESS_COLUMNS: tuple[str, str, str] = ("Address 1", "Address 2", "Address 3")
@@ -17,7 +17,7 @@ _INTERNAL_REFERENCE_COLUMN: str = "Internal Reference"
 _POSTCODE_CLEAN_COLUMN: str = "postcode_clean"
 
 
-class UnsanitisedAddressListCsvS3Repository(UnsanitisedAddressListRepository):
+class UnstandardisedAddressListCsvS3Repository(UnstandardisedAddressListRepository):
     def __init__(self, csv_client: CsvS3Client, bucket: str) -> None:
         self._csv_client = csv_client
         self._bucket = bucket
@@ -36,13 +36,13 @@ class UnsanitisedAddressListCsvS3Repository(UnsanitisedAddressListRepository):
                 for col in _ADDRESS_COLUMNS
                 if col in row and row[col].strip()
             ]
-            unsanitised_address = ", ".join(parts)
+            unstandardised_address = ", ".join(parts)
             postcode = row.get(_POSTCODE_COLUMN, "")
             raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip()
             internal_reference: Optional[str] = raw_ref or None
             addresses.append(
-                UnsanitisedAddress(
-                    address=unsanitised_address,
+                UnstandardisedAddress(
+                    address=unstandardised_address,
                     postcode=Postcode(postcode),
                     org_reference=internal_reference,
                     additional_info=row,
diff --git a/repositories/unsanitised_address/unsanitised_address_list_repository.py b/repositories/unstandardised_address/unstandardised_address_list_repository.py
similarity index 69%
rename from repositories/unsanitised_address/unsanitised_address_list_repository.py
rename to repositories/unstandardised_address/unstandardised_address_list_repository.py
index 2f842fcd..4d446304 100644
--- a/repositories/unsanitised_address/unsanitised_address_list_repository.py
+++ b/repositories/unstandardised_address/unstandardised_address_list_repository.py
@@ -2,10 +2,10 @@ from __future__ import annotations
 
 from abc import ABC, abstractmethod
 
-from domain.addresses.unsanitised_address import AddressList
+from domain.addresses.unstandardised_address import AddressList
 
 
-class UnsanitisedAddressListRepository(ABC):
+class UnstandardisedAddressListRepository(ABC):
     @abstractmethod
     def load_batch(self, s3_uri: str) -> AddressList: ...
 
diff --git a/tests/domain/addresses/test_postcode_batching.py b/tests/domain/addresses/test_postcode_batching.py
index 443e43df..e5b3e186 100644
--- a/tests/domain/addresses/test_postcode_batching.py
+++ b/tests/domain/addresses/test_postcode_batching.py
@@ -1,14 +1,14 @@
 import pytest
 
 from domain.addresses.postcode_batching import iter_postcode_grouped_batches
-from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
+from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
 from domain.postcode import Postcode
 
 
 def _addrs(postcode: str, n: int) -> AddressList:
     return AddressList(
         [
-            UnsanitisedAddress(address=f"{i} {postcode} Street", postcode=Postcode(postcode))
+            UnstandardisedAddress(address=f"{i} {postcode} Street", postcode=Postcode(postcode))
             for i in range(n)
         ]
     )
diff --git a/tests/domain/addresses/test_unsanitised_address.py b/tests/domain/addresses/test_unstandardised_address.py
similarity index 52%
rename from tests/domain/addresses/test_unsanitised_address.py
rename to tests/domain/addresses/test_unstandardised_address.py
index aa6d0071..dd4eabdb 100644
--- a/tests/domain/addresses/test_unsanitised_address.py
+++ b/tests/domain/addresses/test_unstandardised_address.py
@@ -2,36 +2,36 @@ import dataclasses
 
 import pytest
 
-from domain.addresses.unsanitised_address import UnsanitisedAddress
+from domain.addresses.unstandardised_address import UnstandardisedAddress
 from domain.postcode import Postcode
 
 
-def test_unsanitised_address_holds_postcode_value_object() -> None:
+def test_unstandardised_address_holds_postcode_value_object() -> None:
     # act
-    addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    addr = UnstandardisedAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
     # assert
     assert addr.postcode == Postcode("SW1A1AA")
 
 
-def test_unsanitised_address_preserves_unsanitised_address_verbatim() -> None:
-    # The free-text unsanitised_address string is intentionally NOT normalised --
+def test_unstandardised_address_preserves_unstandardised_address_verbatim() -> None:
+    # The free-text unstandardised_address string is intentionally NOT normalised --
     # only the postcode is canonicalised, and that happens inside Postcode.
     # act
-    addr = UnsanitisedAddress(address="  1 The   Street  ", postcode=Postcode("SW1A1AA"))
+    addr = UnstandardisedAddress(address="  1 The   Street  ", postcode=Postcode("SW1A1AA"))
     # assert
     assert addr.address == "  1 The   Street  "
 
 
-def test_unsanitised_address_internal_reference_defaults_to_none() -> None:
+def test_unstandardised_address_internal_reference_defaults_to_none() -> None:
     # act
-    addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = UnstandardisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # assert
     assert addr.org_reference is None
 
 
-def test_unsanitised_address_internal_reference_accepted() -> None:
+def test_unstandardised_address_internal_reference_accepted() -> None:
     # act
-    addr = UnsanitisedAddress(
+    addr = UnstandardisedAddress(
         address="1 The Street",
         postcode=Postcode("SW1A1AA"),
         org_reference="cust-42",
@@ -40,36 +40,36 @@ def test_unsanitised_address_internal_reference_accepted() -> None:
     assert addr.org_reference == "cust-42"
 
 
-def test_unsanitised_address_is_frozen() -> None:
+def test_unstandardised_address_is_frozen() -> None:
     # arrange
-    addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = UnstandardisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # act / assert
     with pytest.raises(dataclasses.FrozenInstanceError):
         addr.postcode = Postcode("OTHER")  # type: ignore[misc]
 
 
-def test_unsanitised_address_equality_uses_canonical_postcode() -> None:
+def test_unstandardised_address_equality_uses_canonical_postcode() -> None:
     # Postcode sanitises eagerly, so addresses built from different surface
     # forms of the same postcode compare equal.
     # arrange
-    a = UnsanitisedAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
-    b = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    a = UnstandardisedAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    b = UnstandardisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # act / assert
     assert a == b
 
 
-def test_unsanitised_address_source_row_defaults_to_empty_dict() -> None:
+def test_unstandardised_address_source_row_defaults_to_empty_dict() -> None:
     # act
-    addr = UnsanitisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    addr = UnstandardisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
     # assert
     assert addr.additional_info == {}
 
 
-def test_unsanitised_address_carries_source_row() -> None:
+def test_unstandardised_address_carries_source_row() -> None:
     # arrange
     row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
     # act
-    addr = UnsanitisedAddress(
+    addr = UnstandardisedAddress(
         address="1 The Street",
         postcode=Postcode("SW1A 1AA"),
         additional_info=row,
@@ -78,16 +78,16 @@ def test_unsanitised_address_carries_source_row() -> None:
     assert addr.additional_info == row
 
 
-def test_unsanitised_address_equality_ignores_source_row() -> None:
+def test_unstandardised_address_equality_ignores_source_row() -> None:
     # source_row is excluded from equality (and hashing): identity stays
     # defined by the parsed fields.
     # arrange
-    a = UnsanitisedAddress(
+    a = UnstandardisedAddress(
         address="1 The Street",
         postcode=Postcode("SW1A1AA"),
         additional_info={"x": "1"},
     )
-    b = UnsanitisedAddress(
+    b = UnstandardisedAddress(
         address="1 The Street",
         postcode=Postcode("SW1A1AA"),
         additional_info={"y": "2"},
diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
index 7e2c5167..b3658014 100644
--- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py
+++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
@@ -1,16 +1,16 @@
 from __future__ import annotations
 
-from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
+from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
 from domain.postcode import Postcode
 from orchestration.sal_orchestrator import (
     SALOrchestrator,
 )
-from repositories.unsanitised_address.unsanitised_address_list_repository import (
-    UnsanitisedAddressListRepository,
+from repositories.unstandardised_address.unstandardised_address_list_repository import (
+    UnstandardisedAddressListRepository,
 )
 
 
-class _StubUnsanitisedAddressRepository(UnsanitisedAddressListRepository):
+class _StubUnstandardisedAddressRepository(UnstandardisedAddressListRepository):
     """``get_col_to_description_mappings`` never touches the repo."""
 
     def load_batch(self, s3_uri: str) -> AddressList:
@@ -20,8 +20,8 @@ class _StubUnsanitisedAddressRepository(UnsanitisedAddressListRepository):
         raise NotImplementedError()
 
 
-def _make_unsanitised_address(landlord_additional_info: dict[str, str]) -> UnsanitisedAddress:
-    return UnsanitisedAddress(
+def _make_unstandardised_address(landlord_additional_info: dict[str, str]) -> UnstandardisedAddress:
+    return UnstandardisedAddress(
         address="1 High St",
         postcode=Postcode("AA1 1AA"),
         additional_info=landlord_additional_info,
@@ -29,16 +29,16 @@ def _make_unsanitised_address(landlord_additional_info: dict[str, str]) -> Unsan
 
 
 def _orchestrator() -> SALOrchestrator:
-    return SALOrchestrator(unsanitised_address_repo=_StubUnsanitisedAddressRepository())
+    return SALOrchestrator(unstandardised_address_repo=_StubUnstandardisedAddressRepository())
 
 
 def test_collects_every_value_per_shared_key() -> None:
     # arrange: every address carries the same keys, all values distinct.
     addresses = AddressList(
         [
-            _make_unsanitised_address({"description": "cosy", "condition": "new"}),
-            _make_unsanitised_address({"description": "spacious", "condition": "worn"}),
-            _make_unsanitised_address({"description": "bright", "condition": "fair"}),
+            _make_unstandardised_address({"description": "cosy", "condition": "new"}),
+            _make_unstandardised_address({"description": "spacious", "condition": "worn"}),
+            _make_unstandardised_address({"description": "bright", "condition": "fair"}),
         ]
     )
 
@@ -56,9 +56,9 @@ def test_repeated_values_collapse_to_one_variant() -> None:
     # arrange: two addresses share the same wall description.
     addresses = AddressList(
         [
-            _make_unsanitised_address({"description": "cosy"}),
-            _make_unsanitised_address({"description": "cosy"}),
-            _make_unsanitised_address({"description": "bright"}),
+            _make_unstandardised_address({"description": "cosy"}),
+            _make_unstandardised_address({"description": "cosy"}),
+            _make_unstandardised_address({"description": "bright"}),
         ]
     )
 
@@ -73,9 +73,9 @@ def test_case_only_variants_collapse_to_one() -> None:
     # arrange: the same description typed with inconsistent casing.
     addresses = AddressList(
         [
-            _make_unsanitised_address({"description": "Cosy"}),
-            _make_unsanitised_address({"description": "cosy"}),
-            _make_unsanitised_address({"description": "COSY"}),
+            _make_unstandardised_address({"description": "Cosy"}),
+            _make_unstandardised_address({"description": "cosy"}),
+            _make_unstandardised_address({"description": "COSY"}),
         ]
     )
 
@@ -96,7 +96,7 @@ def test_empty_address_list_yields_empty_mapping() -> None:
 
 def test_single_address_yields_single_value_per_key() -> None:
     # arrange
-    addresses = AddressList([_make_unsanitised_address({"description": "cosy"})])
+    addresses = AddressList([_make_unstandardised_address({"description": "cosy"})])
 
     # act
     mappings = _orchestrator().get_col_to_description_mappings(addresses)
diff --git a/tests/orchestration/test_postcode_splitter_orchestrator.py b/tests/orchestration/test_postcode_splitter_orchestrator.py
index 4317156c..d21bcfba 100644
--- a/tests/orchestration/test_postcode_splitter_orchestrator.py
+++ b/tests/orchestration/test_postcode_splitter_orchestrator.py
@@ -18,8 +18,8 @@ from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchest
 from orchestration.task_orchestrator import TaskOrchestrator
 from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
 from repositories.tasks.task_postgres_repository import TaskPostgresRepository
-from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import (
-    UnsanitisedAddressListCsvS3Repository,
+from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import (
+    UnstandardisedAddressListCsvS3Repository,
 )
 
 BUCKET = "splitter-bucket"
@@ -64,7 +64,7 @@ class Harness:
     csv_client: CsvS3Client
     boto_sqs: Any
     queue_url: str
-    repo: UnsanitisedAddressListCsvS3Repository
+    repo: UnstandardisedAddressListCsvS3Repository
 
 
 @pytest.fixture
@@ -78,7 +78,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]:
         queue_url = cast(str, queue["QueueUrl"])
 
         csv_client = CsvS3Client(boto_s3, BUCKET)
-        repo = UnsanitisedAddressListCsvS3Repository(csv_client, BUCKET)
+        repo = UnstandardisedAddressListCsvS3Repository(csv_client, BUCKET)
         queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
 
         # DB: ephemeral PostgreSQL TaskOrchestrator
@@ -91,7 +91,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]:
 
             splitter = PostcodeSplitterOrchestrator(
                 task_orchestrator=task_orchestrator,
-                unsanitised_address_repo=repo,
+                unstandardised_address_repo=repo,
                 queue_client=queue_client,
                 max_batch_size=3,
             )
diff --git a/tests/repositories/unsanitised_address/__init__.py b/tests/repositories/unstandardised_address/__init__.py
similarity index 100%
rename from tests/repositories/unsanitised_address/__init__.py
rename to tests/repositories/unstandardised_address/__init__.py
diff --git a/tests/repositories/unsanitised_address/conftest.py b/tests/repositories/unstandardised_address/conftest.py
similarity index 100%
rename from tests/repositories/unsanitised_address/conftest.py
rename to tests/repositories/unstandardised_address/conftest.py
diff --git a/tests/repositories/unsanitised_address/test_unsanitised_address_list_csv_s3_repository.py b/tests/repositories/unstandardised_address/test_unstandardised_address_list_csv_s3_repository.py
similarity index 85%
rename from tests/repositories/unsanitised_address/test_unsanitised_address_list_csv_s3_repository.py
rename to tests/repositories/unstandardised_address/test_unstandardised_address_list_csv_s3_repository.py
index ff26f08a..866d6f2d 100644
--- a/tests/repositories/unsanitised_address/test_unsanitised_address_list_csv_s3_repository.py
+++ b/tests/repositories/unstandardised_address/test_unstandardised_address_list_csv_s3_repository.py
@@ -3,11 +3,11 @@ from collections.abc import Iterator
 import pytest
 from moto import mock_aws
 
-from domain.addresses.unsanitised_address import AddressList, UnsanitisedAddress
+from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
 from domain.postcode import Postcode
 from infrastructure.csv_s3_client import CsvS3Client
-from repositories.unsanitised_address.unsanitised_address_list_csv_s3_repository import (
-    UnsanitisedAddressListCsvS3Repository,
+from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import (
+    UnstandardisedAddressListCsvS3Repository,
 )
 from tests.infrastructure import make_boto_client
 
@@ -15,22 +15,22 @@ BUCKET = "user-address-bucket"
 
 
 @pytest.fixture
-def repo() -> Iterator[UnsanitisedAddressListCsvS3Repository]:
+def repo() -> Iterator[UnstandardisedAddressListCsvS3Repository]:
     with mock_aws():
         boto_client = make_boto_client("s3")
         boto_client.create_bucket(Bucket=BUCKET)
         csv_client = CsvS3Client(boto_client, BUCKET)
-        yield UnsanitisedAddressListCsvS3Repository(csv_client, BUCKET)
+        yield UnstandardisedAddressListCsvS3Repository(csv_client, BUCKET)
 
 
 def _upload_csv(
-    repo: UnsanitisedAddressListCsvS3Repository, rows: list[dict[str, str]], key: str
+    repo: UnstandardisedAddressListCsvS3Repository, rows: list[dict[str, str]], key: str
 ) -> str:
     return repo._csv_client.save_rows(rows, key)  # pyright: ignore[reportPrivateUsage]
 
 
 def test_load_batch_parses_address_postcode_and_reference(
-    repo: UnsanitisedAddressListCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -56,7 +56,7 @@ def test_load_batch_parses_address_postcode_and_reference(
 
 
 def test_load_batch_uses_only_address_1_when_others_missing(
-    repo: UnsanitisedAddressListCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -81,7 +81,7 @@ def test_load_batch_uses_only_address_1_when_others_missing(
 
 
 def test_load_batch_handles_missing_internal_reference(
-    repo: UnsanitisedAddressListCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -106,10 +106,10 @@ def test_load_batch_handles_missing_internal_reference(
 
 
 def test_load_batch_captures_full_source_row(
-    repo: UnsanitisedAddressListCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
     # A raw EPC-export-shaped row: the splitter must preserve every column,
-    # not just the ones it parses into UnsanitisedAddress fields.
+    # not just the ones it parses into UnstandardisedAddress fields.
     # arrange
     row = {
         "Asset Reference": "511",
@@ -128,7 +128,7 @@ def test_load_batch_captures_full_source_row(
 
 
 def test_load_batch_raises_when_postcode_column_absent(
-    repo: UnsanitisedAddressListCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [{"Address 1": "1 High Street", "Property Type": "Flat"}]
@@ -140,7 +140,7 @@ def test_load_batch_raises_when_postcode_column_absent(
 
 
 def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
-    repo: UnsanitisedAddressListCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     row = {
@@ -169,12 +169,12 @@ def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
 
 
 def test_save_batch_returns_uri_under_path_prefix(
-    repo: UnsanitisedAddressListCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     addresses = AddressList(
         [
-            UnsanitisedAddress(
+            UnstandardisedAddress(
                 address="1 High Street",
                 postcode=Postcode("SW1A 1AA"),
                 additional_info={
@@ -194,7 +194,7 @@ def test_save_batch_returns_uri_under_path_prefix(
 
 
 def test_save_then_reload_round_trip_preserves_columns(
-    repo: UnsanitisedAddressListCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     rows = [
@@ -227,12 +227,12 @@ def test_save_then_reload_round_trip_preserves_columns(
 
 
 def test_save_batch_uses_unique_filename_per_call(
-    repo: UnsanitisedAddressListCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
     # arrange
     addresses = AddressList(
         [
-            UnsanitisedAddress(
+            UnstandardisedAddress(
                 address="1 High Street",
                 postcode=Postcode("SW1A 1AA"),
                 additional_info={

From 675aa089c937c51aa6c6b59df52aa19814e9a3de Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 22 May 2026 14:00:33 +0000
Subject: [PATCH 15/29] updated rdsap option; seperated s3 location in
 infrastrucutre; added open ai api

---
 applications/SAL/handler.py                   |  37 +--
 applications/postcode_splitter/handler.py     |   2 +-
 datatypes/epc/domain/epc_property_data.py     |  22 +-
 datatypes/epc/schema/rdsap_schema_17_0.py     |   2 +-
 datatypes/epc/schema/rdsap_schema_17_1.py     |   2 +-
 datatypes/epc/schema/rdsap_schema_18_0.py     |   3 +-
 datatypes/epc/schema/rdsap_schema_19_0.py     |   2 +-
 datatypes/epc/schema/rdsap_schema_20_0_0.py   |   3 +-
 datatypes/epc/schema/rdsap_schema_21_0_0.py   |   4 +-
 datatypes/epc/schema/rdsap_schema_21_0_1.py   |   4 +-
 domain/epc/__init__.py                        |   4 +
 domain/epc/epc_record.py                      |  21 ++
 domain/epc/property_type.py                   |   9 +
 infrastructure/epc/__init__.py                |  13 ++
 infrastructure/epc/epc_client.py              |  41 ++++
 infrastructure/epc/exceptions.py              |  17 ++
 infrastructure/epc/gov_uk/__init__.py         |   6 +
 infrastructure/epc/gov_uk/_retry.py           |  34 +++
 .../epc/gov_uk/gov_uk_epc_client.py           | 132 +++++++++++
 .../epc/gov_uk/gov_uk_property_type.py        |  25 +++
 .../__init__.py                               |   5 +
 ...orical_open_data_communities_epc_client.py |  24 ++
 infrastructure/openai/__init__.py             |   0
 infrastructure/openai/exceptions.py           |   2 +
 infrastructure/openai/openai_client.py        |  60 +++++
 infrastructure/s3/__init__.py                 |   0
 infrastructure/{ => s3}/csv_s3_client.py      |   4 +-
 infrastructure/{ => s3}/s3_client.py          |   0
 infrastructure/{ => s3}/s3_uri.py             |   0
 ...dardised_address_list_csv_s3_repository.py |   2 +-
 tests/infrastructure/epc/__init__.py          |   0
 tests/infrastructure/epc/gov_uk/__init__.py   |   0
 tests/infrastructure/epc/gov_uk/conftest.py   |  49 ++++
 .../epc/gov_uk/test_gov_uk_epc_client.py      | 211 ++++++++++++++++++
 tests/infrastructure/test_csv_s3_client.py    |   2 +-
 tests/infrastructure/test_s3_client.py        |   2 +-
 tests/infrastructure/test_s3_uri.py           |   2 +-
 .../test_postcode_splitter_orchestrator.py    |   2 +-
 ...dardised_address_list_csv_s3_repository.py |   2 +-
 39 files changed, 709 insertions(+), 41 deletions(-)
 create mode 100644 domain/epc/__init__.py
 create mode 100644 domain/epc/epc_record.py
 create mode 100644 domain/epc/property_type.py
 create mode 100644 infrastructure/epc/__init__.py
 create mode 100644 infrastructure/epc/epc_client.py
 create mode 100644 infrastructure/epc/exceptions.py
 create mode 100644 infrastructure/epc/gov_uk/__init__.py
 create mode 100644 infrastructure/epc/gov_uk/_retry.py
 create mode 100644 infrastructure/epc/gov_uk/gov_uk_epc_client.py
 create mode 100644 infrastructure/epc/gov_uk/gov_uk_property_type.py
 create mode 100644 infrastructure/epc/historical_open_data_communities/__init__.py
 create mode 100644 infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py
 create mode 100644 infrastructure/openai/__init__.py
 create mode 100644 infrastructure/openai/exceptions.py
 create mode 100644 infrastructure/openai/openai_client.py
 create mode 100644 infrastructure/s3/__init__.py
 rename infrastructure/{ => s3}/csv_s3_client.py (95%)
 rename infrastructure/{ => s3}/s3_client.py (100%)
 rename infrastructure/{ => s3}/s3_uri.py (100%)
 create mode 100644 tests/infrastructure/epc/__init__.py
 create mode 100644 tests/infrastructure/epc/gov_uk/__init__.py
 create mode 100644 tests/infrastructure/epc/gov_uk/conftest.py
 create mode 100644 tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py

diff --git a/applications/SAL/handler.py b/applications/SAL/handler.py
index 6076a662..f354171c 100644
--- a/applications/SAL/handler.py
+++ b/applications/SAL/handler.py
@@ -3,12 +3,14 @@ import boto3
 from orchestration.sal_orchestrator import (
     SALOrchestrator,
 )
-from infrastructure.csv_s3_client import CsvS3Client
+from infrastructure.s3.csv_s3_client import CsvS3Client
 from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import (
     UnstandardisedAddressListCsvS3Repository,
 )
 from domain.addresses.unstandardised_address import AddressList
 
+from infrastructure.epc.gov_uk import GovUkEpcClient
+
 
 def handler(
     body: dict[str, Any],
@@ -24,7 +26,9 @@ def handler(
     boto_s3: Any = boto3_client("s3")
 
     csv_client = CsvS3Client(boto_s3, bucket)
-    unstandardised_address_repo = UnstandardisedAddressListCsvS3Repository(csv_client, bucket)
+    unstandardised_address_repo = UnstandardisedAddressListCsvS3Repository(
+        csv_client, bucket
+    )
 
     sal = SALOrchestrator(
         unstandardised_address_repo=unstandardised_address_repo,
@@ -36,20 +40,17 @@ def handler(
         list_of_unstandardised_address=addressList
     )
 
-    # Read csv of user input
-    # get the column and unique variations of each description
-    # { walls: "wall variation 1", "wall varition 2"}
-    # Call chatgpt(input from landlord, our way of understanding the mapping) Retrun -> lanlordMapped
+    """
+    ----
+    # TODO Property Type:
+    # 1) Make a small enum with all property types (5 enum)
+    # 2) Make an interface with ChatGPTAi to get wall field description and map it to enum
+    # 3) Stroe in landlord overrides
+    # TODO Wall Type:
+    # 1) Make a small enum with all property types (5 enum)
+    # 2) Make an interface with ChatGPTAi to get wall field description and map it to enum
+    # 3) Stroe in landlord overrides
+    ---
+    """
 
-
-    ENUM Walls:
-        cavity_wall_1976: 1
-    
-    # 1) COuld download site notes from pashub and get
-    # 2) Open Data communites API -> 
-    # 3) new api
-    
-    # User story:
-    # cavity: asbuilt (1976 - 1982): 
-
-    return {"hello world": ["hello world"]}
+    return {"hello": ["200"]}
diff --git a/applications/postcode_splitter/handler.py b/applications/postcode_splitter/handler.py
index ac2c4e99..e34a6af3 100644
--- a/applications/postcode_splitter/handler.py
+++ b/applications/postcode_splitter/handler.py
@@ -9,7 +9,7 @@ from applications.postcode_splitter.postcode_splitter_trigger_body import (
     PostcodeSplitterTriggerBody,
 )
 from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
-from infrastructure.csv_s3_client import CsvS3Client
+from infrastructure.s3.csv_s3_client import CsvS3Client
 from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
 from orchestration.task_orchestrator import TaskOrchestrator
 from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import (
diff --git a/datatypes/epc/domain/epc_property_data.py b/datatypes/epc/domain/epc_property_data.py
index 8795b389..68a25205 100644
--- a/datatypes/epc/domain/epc_property_data.py
+++ b/datatypes/epc/domain/epc_property_data.py
@@ -29,7 +29,9 @@ class MainHeatingDetail:
     boiler_flue_type: Optional[int] = None  # TODO: make enum?
     boiler_ignition_type: Optional[int] = None  # TODO: make enum?
     central_heating_pump_age: Optional[int] = None
-    central_heating_pump_age_str: Optional[str] = None  # str from site notes e.g. "Unknown", "Pre 2013"
+    central_heating_pump_age_str: Optional[str] = (
+        None  # str from site notes e.g. "Unknown", "Pre 2013"
+    )
     main_heating_index_number: Optional[int] = None
     sap_main_heating_code: Optional[int] = None  # TODO: make enum?
     main_heating_number: Optional[int] = None
@@ -54,7 +56,7 @@ class ShowerOutlets:
 
 @dataclass
 class SapHeating:
-    instantaneous_wwhrs: InstantaneousWwhrs
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
     main_heating_details: List[MainHeatingDetail]
     has_fixed_air_conditioning: bool
     cylinder_size: Optional[Union[int, str]] = (
@@ -67,7 +69,9 @@ class SapHeating:
     cylinder_insulation_type: Optional[Union[int, str]] = None
     cylinder_thermostat: Optional[str] = None
     secondary_fuel_type: Optional[int] = None
-    secondary_heating_type: Optional[Union[int, str]] = None  # int from API; str from site notes
+    secondary_heating_type: Optional[Union[int, str]] = (
+        None  # int from API; str from site notes
+    )
     cylinder_insulation_thickness_mm: Optional[int] = None
 
 
@@ -75,7 +79,9 @@ class SapHeating:
 class SapVentilation:
     ventilation_type: Optional[str] = None
     draught_lobby: Optional[bool] = None
-    pressure_test: Optional[str] = None  # str from site notes e.g. "No test"; int in API via mechanical_ventilation
+    pressure_test: Optional[str] = (
+        None  # str from site notes e.g. "No test"; int in API via mechanical_ventilation
+    )
     open_flues_count: Optional[int] = None
     closed_flues_count: Optional[int] = None
     boiler_flues_count: Optional[int] = None
@@ -219,8 +225,12 @@ class SapBuildingPart:
         None  # TODO: make enum/mapping?
     )
     floor_type: Optional[str] = None  # str from site notes e.g. "Ground Floor"
-    floor_construction_type: Optional[str] = None  # str from site notes; distinct from floor_construction: int in SapFloorDimension
-    floor_insulation_type_str: Optional[str] = None  # str from site notes e.g. "As Built"
+    floor_construction_type: Optional[str] = (
+        None  # str from site notes; distinct from floor_construction: int in SapFloorDimension
+    )
+    floor_insulation_type_str: Optional[str] = (
+        None  # str from site notes e.g. "As Built"
+    )
     floor_u_value_known: Optional[bool] = None
 
     roof_construction: Optional[int] = None
diff --git a/datatypes/epc/schema/rdsap_schema_17_0.py b/datatypes/epc/schema/rdsap_schema_17_0.py
index 22aaded4..9cbedf97 100644
--- a/datatypes/epc/schema/rdsap_schema_17_0.py
+++ b/datatypes/epc/schema/rdsap_schema_17_0.py
@@ -37,7 +37,7 @@ class SapHeating:
     cylinder_size: int
     water_heating_code: int
     water_heating_fuel: int
-    instantaneous_wwhrs: InstantaneousWwhrs
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
     main_heating_details: List[MainHeatingDetail]
     immersion_heating_type: Union[int, str]
     cylinder_insulation_type: int
diff --git a/datatypes/epc/schema/rdsap_schema_17_1.py b/datatypes/epc/schema/rdsap_schema_17_1.py
index a4c007ed..b0af07e6 100644
--- a/datatypes/epc/schema/rdsap_schema_17_1.py
+++ b/datatypes/epc/schema/rdsap_schema_17_1.py
@@ -41,7 +41,7 @@ class SapHeating:
     cylinder_size: int
     water_heating_code: int
     water_heating_fuel: int
-    instantaneous_wwhrs: InstantaneousWwhrs
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
     main_heating_details: List[MainHeatingDetail]
     immersion_heating_type: Union[int, str]
     cylinder_insulation_type: int
diff --git a/datatypes/epc/schema/rdsap_schema_18_0.py b/datatypes/epc/schema/rdsap_schema_18_0.py
index a038dc9b..4ce2f887 100644
--- a/datatypes/epc/schema/rdsap_schema_18_0.py
+++ b/datatypes/epc/schema/rdsap_schema_18_0.py
@@ -41,7 +41,7 @@ class SapHeating:
     cylinder_size: int
     water_heating_code: int
     water_heating_fuel: int
-    instantaneous_wwhrs: InstantaneousWwhrs
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
     main_heating_details: List[MainHeatingDetail]
     immersion_heating_type: Union[int, str]
     has_fixed_air_conditioning: str
@@ -86,6 +86,7 @@ class SapFloorDimension:
 @dataclass
 class SapRoomInRoof:
     """Room-in-roof details. floor_area is a Measurement object in schema 18.0."""
+
     floor_area: Measurement
     insulation: str
     roof_room_connected: str
diff --git a/datatypes/epc/schema/rdsap_schema_19_0.py b/datatypes/epc/schema/rdsap_schema_19_0.py
index b94d9bb3..b3c77ec4 100644
--- a/datatypes/epc/schema/rdsap_schema_19_0.py
+++ b/datatypes/epc/schema/rdsap_schema_19_0.py
@@ -41,7 +41,7 @@ class SapHeating:
     cylinder_size: int
     water_heating_code: int
     water_heating_fuel: int
-    instantaneous_wwhrs: InstantaneousWwhrs
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
     main_heating_details: List[MainHeatingDetail]
     immersion_heating_type: Union[int, str]
     has_fixed_air_conditioning: str
diff --git a/datatypes/epc/schema/rdsap_schema_20_0_0.py b/datatypes/epc/schema/rdsap_schema_20_0_0.py
index 8f3986a2..9deb235e 100644
--- a/datatypes/epc/schema/rdsap_schema_20_0_0.py
+++ b/datatypes/epc/schema/rdsap_schema_20_0_0.py
@@ -49,7 +49,7 @@ class SapHeating:
     cylinder_size: int
     water_heating_code: int
     water_heating_fuel: int
-    instantaneous_wwhrs: InstantaneousWwhrs
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
     main_heating_details: List[MainHeatingDetail]
     immersion_heating_type: Union[int, str]
     has_fixed_air_conditioning: str
@@ -103,6 +103,7 @@ class SapFloorDimension:
 @dataclass
 class SapRoomInRoof:
     """Room-in-roof details. floor_area is a plain number in schema 20.0.0 (not a Measurement object)."""
+
     floor_area: Union[int, float]
     insulation: str
     roof_room_connected: str
diff --git a/datatypes/epc/schema/rdsap_schema_21_0_0.py b/datatypes/epc/schema/rdsap_schema_21_0_0.py
index eee00cb8..8d19e5f9 100644
--- a/datatypes/epc/schema/rdsap_schema_21_0_0.py
+++ b/datatypes/epc/schema/rdsap_schema_21_0_0.py
@@ -33,6 +33,7 @@ class ShowerOutlets:
 @dataclass
 class InstantaneousWwhrs:
     """Changed in 21.0.0: references WWHRS product index numbers instead of room counts."""
+
     wwhrs_index_number1: Optional[int] = None
     wwhrs_index_number2: Optional[int] = None
 
@@ -61,7 +62,7 @@ class SapHeating:
     cylinder_size: int
     water_heating_code: int
     water_heating_fuel: int
-    instantaneous_wwhrs: InstantaneousWwhrs
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
     main_heating_details: List[MainHeatingDetail]
     immersion_heating_type: Union[int, str]
     has_fixed_air_conditioning: str
@@ -154,6 +155,7 @@ class SapFloorDimension:
 @dataclass
 class SapRoomInRoof:
     """Room-in-roof details. insulation and roof_room_connected removed in schema 21.0.0."""
+
     floor_area: Union[int, float]
     construction_age_band: str
 
diff --git a/datatypes/epc/schema/rdsap_schema_21_0_1.py b/datatypes/epc/schema/rdsap_schema_21_0_1.py
index 9b3dbd1d..f6be7cc3 100644
--- a/datatypes/epc/schema/rdsap_schema_21_0_1.py
+++ b/datatypes/epc/schema/rdsap_schema_21_0_1.py
@@ -50,7 +50,7 @@ class MainHeatingDetail:
     main_heating_fraction: int
     main_heating_data_source: int
     boiler_flue_type: Optional[int] = None
-    fan_flue_present: Optional[str] = None # TODO: make bool
+    fan_flue_present: Optional[str] = None  # TODO: make bool
     boiler_ignition_type: Optional[int] = None
     central_heating_pump_age: Optional[int] = None
     main_heating_index_number: Optional[int] = None
@@ -62,7 +62,7 @@ class SapHeating:
     cylinder_size: int
     water_heating_code: int
     water_heating_fuel: int
-    instantaneous_wwhrs: InstantaneousWwhrs
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
     main_heating_details: List[MainHeatingDetail]
     immersion_heating_type: Union[int, str]
     has_fixed_air_conditioning: str
diff --git a/domain/epc/__init__.py b/domain/epc/__init__.py
new file mode 100644
index 00000000..e49fea42
--- /dev/null
+++ b/domain/epc/__init__.py
@@ -0,0 +1,4 @@
+from domain.epc.epc_record import EpcRecord
+from domain.epc.property_type import PropertyType
+
+__all__ = ["EpcRecord", "PropertyType"]
diff --git a/domain/epc/epc_record.py b/domain/epc/epc_record.py
new file mode 100644
index 00000000..7194d1d6
--- /dev/null
+++ b/domain/epc/epc_record.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Optional
+
+from domain.epc.property_type import PropertyType
+
+
+@dataclass(frozen=True)
+class EpcRecord:
+    """A streamlined record of EPC property data.
+
+    A focused subset of the full ``EpcPropertyData``: a property's identity
+    plus its typed property type. Grow this with further fields as the
+    domain needs them.
+    """
+
+    address_line_1: str
+    postcode: str
+    uprn: Optional[int]
+    property_type: PropertyType
diff --git a/domain/epc/property_type.py b/domain/epc/property_type.py
new file mode 100644
index 00000000..707988aa
--- /dev/null
+++ b/domain/epc/property_type.py
@@ -0,0 +1,9 @@
+from enum import Enum
+
+
+class PropertyType(Enum):
+    HOUSE = "House"
+    BUNGALOW = "Bungalow"
+    FLAT = "Flat"
+    MAISONETTE = "Maisonette"
+    PARK_HOME = "Park home"
diff --git a/infrastructure/epc/__init__.py b/infrastructure/epc/__init__.py
new file mode 100644
index 00000000..f99a7cb3
--- /dev/null
+++ b/infrastructure/epc/__init__.py
@@ -0,0 +1,13 @@
+from infrastructure.epc.epc_client import EpcClient
+from infrastructure.epc.exceptions import (
+    EpcApiError,
+    EpcNotFoundError,
+    EpcRateLimitError,
+)
+
+__all__ = [
+    "EpcApiError",
+    "EpcClient",
+    "EpcNotFoundError",
+    "EpcRateLimitError",
+]
diff --git a/infrastructure/epc/epc_client.py b/infrastructure/epc/epc_client.py
new file mode 100644
index 00000000..d1f8639c
--- /dev/null
+++ b/infrastructure/epc/epc_client.py
@@ -0,0 +1,41 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Optional
+
+from datatypes.epc.domain.epc_property_data import EpcPropertyData
+from datatypes.epc.search import EpcSearchResult
+
+
+class EpcClient(ABC):
+    """Interface for retrieving EPC (Energy Performance Certificate) data.
+
+    Implementations fetch from a data source and return domain objects;
+    callers depend only on this interface, not on a concrete transport.
+    """
+
+    @abstractmethod
+    def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
+        """Return the EPC certificates registered at ``postcode``.
+
+        Returns an empty list when the postcode has no certificates.
+        """
+        ...
+
+    @abstractmethod
+    def get_by_certificate_number(
+        self, certificate_number: str
+    ) -> EpcPropertyData:
+        """Return the full EPC record for a certificate number.
+
+        Raises EpcNotFoundError when no such certificate exists.
+        """
+        ...
+
+    @abstractmethod
+    def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]:
+        """Return the most recent EPC record for ``uprn``.
+
+        Returns None when the UPRN has no certificates.
+        """
+        ...
diff --git a/infrastructure/epc/exceptions.py b/infrastructure/epc/exceptions.py
new file mode 100644
index 00000000..8e2e5165
--- /dev/null
+++ b/infrastructure/epc/exceptions.py
@@ -0,0 +1,17 @@
+from typing import Optional
+
+
+class EpcApiError(Exception):
+    """Base for all EPC client errors."""
+
+
+class EpcNotFoundError(EpcApiError):
+    """Raised when the API returns 404 for a resource that must exist."""
+
+
+class EpcRateLimitError(EpcApiError):
+    """Raised when the API returns 429 and all retries are exhausted."""
+
+    def __init__(self, message: str, retry_after: Optional[float] = None) -> None:
+        super().__init__(message)
+        self.retry_after = retry_after
diff --git a/infrastructure/epc/gov_uk/__init__.py b/infrastructure/epc/gov_uk/__init__.py
new file mode 100644
index 00000000..d491a1ef
--- /dev/null
+++ b/infrastructure/epc/gov_uk/__init__.py
@@ -0,0 +1,6 @@
+from infrastructure.epc.gov_uk.gov_uk_epc_client import GovUkEpcClient
+from infrastructure.epc.gov_uk.gov_uk_property_type import (
+    property_type_from_gov_uk_code,
+)
+
+__all__ = ["GovUkEpcClient", "property_type_from_gov_uk_code"]
diff --git a/infrastructure/epc/gov_uk/_retry.py b/infrastructure/epc/gov_uk/_retry.py
new file mode 100644
index 00000000..db92b131
--- /dev/null
+++ b/infrastructure/epc/gov_uk/_retry.py
@@ -0,0 +1,34 @@
+import time
+from typing import Callable, Optional, TypeVar
+
+from infrastructure.epc.exceptions import EpcRateLimitError
+
+T = TypeVar("T")
+
+
+def call_with_retry(
+    fn: Callable[[], T],
+    max_retries: int = 5,
+    backoff_base: float = 1.0,
+    backoff_multiplier: float = 2.0,
+    max_backoff: float = 60.0,
+) -> T:
+    """Call ``fn``, retrying on EpcRateLimitError with exponential backoff.
+
+    Honours the API's ``Retry-After`` header when present, otherwise backs off
+    ``backoff_base * backoff_multiplier ** attempt`` (capped at ``max_backoff``).
+    """
+    last_exc: Optional[EpcRateLimitError] = None
+    for attempt in range(max_retries + 1):
+        try:
+            return fn()
+        except EpcRateLimitError as exc:
+            last_exc = exc
+            if attempt < max_retries:
+                if exc.retry_after is not None:
+                    delay = exc.retry_after
+                else:
+                    delay = backoff_base * (backoff_multiplier**attempt)
+                time.sleep(min(delay, max_backoff))
+    assert last_exc is not None
+    raise last_exc
diff --git a/infrastructure/epc/gov_uk/gov_uk_epc_client.py b/infrastructure/epc/gov_uk/gov_uk_epc_client.py
new file mode 100644
index 00000000..ac0db09f
--- /dev/null
+++ b/infrastructure/epc/gov_uk/gov_uk_epc_client.py
@@ -0,0 +1,132 @@
+# Spec: https://raw.githubusercontent.com/communitiesuk/epb-data-warehouse/main/api/api.yml
+from __future__ import annotations
+
+from typing import Any, Optional
+
+import httpx
+
+from datatypes.epc.domain.epc_property_data import EpcPropertyData
+from datatypes.epc.domain.mapper import EpcPropertyDataMapper
+from datatypes.epc.search import EpcSearchResult
+from infrastructure.epc.epc_client import EpcClient
+from infrastructure.epc.exceptions import (
+    EpcApiError,
+    EpcNotFoundError,
+    EpcRateLimitError,
+)
+from infrastructure.epc.gov_uk._retry import call_with_retry
+
+
+class GovUkEpcClient(EpcClient):
+    """EpcClient backed by the live gov.uk EPC API.
+
+    Endpoint: https://api.get-energy-performance-data.communities.gov.uk
+    """
+
+    BASE_URL = "https://api.get-energy-performance-data.communities.gov.uk"
+    REQUEST_TIMEOUT = 10.0
+
+    def __init__(self, auth_token: str) -> None:
+        self._headers = {
+            "Authorization": f"Bearer {auth_token}",
+            "Accept": "application/json",
+        }
+
+    def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
+        normalised = self._normalise_postcode(postcode)
+        return call_with_retry(lambda: self._search(postcode=normalised))
+
+    def get_by_certificate_number(
+        self, certificate_number: str
+    ) -> EpcPropertyData:
+        raw = call_with_retry(lambda: self._fetch_certificate(certificate_number))
+        return EpcPropertyDataMapper.from_api_response(raw)
+
+    def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]:
+        results = call_with_retry(lambda: self._search(uprn=uprn))
+        if not results:
+            return None
+        latest = max(results, key=lambda r: r.registration_date)
+        return self.get_by_certificate_number(latest.certificate_number)
+
+    # ------------------------------------------------------------------
+    # Private helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _normalise_postcode(postcode: str) -> str:
+        """Return the postcode with all spaces removed and uppercased."""
+        return postcode.replace(" ", "").upper()
+
+    @staticmethod
+    def _parse_retry_after(resp: httpx.Response) -> Optional[float]:
+        header = resp.headers.get("Retry-After")
+        if header is None:
+            return None
+        try:
+            return float(header)
+        except (TypeError, ValueError):
+            return None
+
+    def _fetch_certificate(self, certificate_number: str) -> dict[str, Any]:
+        resp = httpx.get(
+            f"{self.BASE_URL}/api/certificate",
+            params={"certificate_number": certificate_number},
+            headers=self._headers,
+            timeout=self.REQUEST_TIMEOUT,
+        )
+        if resp.status_code == 404:
+            raise EpcNotFoundError(certificate_number)
+        if resp.status_code == 429:
+            raise EpcRateLimitError(
+                "Rate limited by EPC API",
+                retry_after=self._parse_retry_after(resp),
+            )
+        if not resp.is_success:
+            raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}")
+        return resp.json()["data"]
+
+    def _search(
+        self,
+        postcode: Optional[str] = None,
+        uprn: Optional[int] = None,
+    ) -> list[EpcSearchResult]:
+        params: dict[str, str | int] = {}
+        if postcode:
+            params["postcode"] = postcode
+        if uprn is not None:
+            params["uprn"] = uprn
+
+        resp = httpx.get(
+            f"{self.BASE_URL}/api/domestic/search",
+            params=params,
+            headers=self._headers,
+            timeout=self.REQUEST_TIMEOUT,
+        )
+        if resp.status_code == 404:
+            return []
+        if resp.status_code == 429:
+            raise EpcRateLimitError(
+                "Rate limited by EPC API",
+                retry_after=self._parse_retry_after(resp),
+            )
+        if not resp.is_success:
+            raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}")
+
+        rows = resp.json().get("data", [])
+        return [self._parse_search_result(row) for row in rows]
+
+    @staticmethod
+    def _parse_search_result(row: dict[str, Any]) -> EpcSearchResult:
+        return EpcSearchResult(
+            certificate_number=row["certificateNumber"],
+            address_line_1=row["addressLine1"],
+            address_line_2=row.get("addressLine2"),
+            address_line_3=row.get("addressLine3"),
+            address_line_4=row.get("addressLine4"),
+            postcode=row["postcode"],
+            post_town=row["postTown"],
+            uprn=row.get("uprn"),
+            current_energy_efficiency_band=row["currentEnergyEfficiencyBand"],
+            registration_date=row["registrationDate"],
+        )
diff --git a/infrastructure/epc/gov_uk/gov_uk_property_type.py b/infrastructure/epc/gov_uk/gov_uk_property_type.py
new file mode 100644
index 00000000..a0f4a7a3
--- /dev/null
+++ b/infrastructure/epc/gov_uk/gov_uk_property_type.py
@@ -0,0 +1,25 @@
+from domain.epc.property_type import PropertyType
+
+# GOV.UK EPC API ``property_type`` integer codes mapped to the domain type.
+# This translation is GOV.UK-specific and lives in the infrastructure layer so
+# the domain ``PropertyType`` stays free of any source encoding.
+_PROPERTY_TYPE_BY_GOV_UK_CODE: dict[int, PropertyType] = {
+    0: PropertyType.HOUSE,
+    1: PropertyType.BUNGALOW,
+    2: PropertyType.FLAT,
+    3: PropertyType.MAISONETTE,
+    4: PropertyType.PARK_HOME,
+}
+
+
+def property_type_from_gov_uk_code(code: int) -> PropertyType:
+    """Translate a GOV.UK EPC ``property_type`` code to the domain PropertyType.
+
+    Raises ValueError for a code GOV.UK has not been mapped here yet.
+    """
+    try:
+        return _PROPERTY_TYPE_BY_GOV_UK_CODE[code]
+    except KeyError:
+        raise ValueError(
+            f"Unknown GOV.UK EPC property type code: {code}"
+        ) from None
diff --git a/infrastructure/epc/historical_open_data_communities/__init__.py b/infrastructure/epc/historical_open_data_communities/__init__.py
new file mode 100644
index 00000000..88a69081
--- /dev/null
+++ b/infrastructure/epc/historical_open_data_communities/__init__.py
@@ -0,0 +1,5 @@
+from infrastructure.epc.historical_open_data_communities.historical_open_data_communities_epc_client import (
+    HistoricalOpenDataCommunitiesEpcClient,
+)
+
+__all__ = ["HistoricalOpenDataCommunitiesEpcClient"]
diff --git a/infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py b/infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py
new file mode 100644
index 00000000..d8c7f9ac
--- /dev/null
+++ b/infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from typing import Optional
+
+from domain.epc.epc_record import EpcRecord
+
+
+class HistoricalOpenDataCommunitiesEpcClient:
+    """EPC client backed by Open Data Communities' historical EPC data.
+
+    Stub — not yet implemented. Every method raises NotImplementedError for
+    now. Unlike GovUkEpcClient it returns the domain ``EpcRecord`` directly;
+    once the ``EpcClient`` port is migrated to return ``EpcRecord``, this
+    adapter should implement it.
+    """
+
+    def search_by_postcode(self, postcode: str) -> list[EpcRecord]:
+        raise NotImplementedError
+
+    def get_by_certificate_number(self, certificate_number: str) -> EpcRecord:
+        raise NotImplementedError
+
+    def get_by_uprn(self, uprn: int) -> Optional[EpcRecord]:
+        raise NotImplementedError
diff --git a/infrastructure/openai/__init__.py b/infrastructure/openai/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/infrastructure/openai/exceptions.py b/infrastructure/openai/exceptions.py
new file mode 100644
index 00000000..14cf95a2
--- /dev/null
+++ b/infrastructure/openai/exceptions.py
@@ -0,0 +1,2 @@
+class OpenAiClientError(Exception):
+    """Base for all OpenAI client errors."""
diff --git a/infrastructure/openai/openai_client.py b/infrastructure/openai/openai_client.py
new file mode 100644
index 00000000..34af4290
--- /dev/null
+++ b/infrastructure/openai/openai_client.py
@@ -0,0 +1,60 @@
+from __future__ import annotations
+
+import os
+from typing import Optional
+
+from openai import OpenAI
+from openai.types.chat import ChatCompletionMessageParam
+
+from infrastructure.openai.exceptions import OpenAiClientError
+
+
+class OpenAiChatClient:
+    """Thin wrapper over the OpenAI Chat Completions API.
+
+    Sends a single prompt and returns the assistant's reply as plain text.
+    """
+
+    DEFAULT_MODEL = "gpt-4o-mini"
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: Optional[str] = None,
+    ) -> None:
+        key = api_key or os.environ.get("OPENAI_API_KEY")
+        if not key:
+            raise OpenAiClientError(
+                "No OpenAI API key provided. "
+                "Pass api_key or set the OPENAI_API_KEY environment variable."
+            )
+        self._client = OpenAI(api_key=key)
+        self._model = model or self.DEFAULT_MODEL
+
+    def generate(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+    ) -> str:
+        """Send a prompt to the model and return its reply text.
+
+        Args:
+            prompt: The user message to send.
+            system_prompt: Optional instruction that sets the model's behaviour.
+
+        Raises:
+            OpenAiClientError: If the model returns an empty response.
+        """
+        messages: list[ChatCompletionMessageParam] = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": prompt})
+
+        response = self._client.chat.completions.create(
+            model=self._model,
+            messages=messages,
+        )
+        content = response.choices[0].message.content
+        if content is None:
+            raise OpenAiClientError("OpenAI returned an empty response.")
+        return content
diff --git a/infrastructure/s3/__init__.py b/infrastructure/s3/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/infrastructure/csv_s3_client.py b/infrastructure/s3/csv_s3_client.py
similarity index 95%
rename from infrastructure/csv_s3_client.py
rename to infrastructure/s3/csv_s3_client.py
index d058ba53..67c9a8d4 100644
--- a/infrastructure/csv_s3_client.py
+++ b/infrastructure/s3/csv_s3_client.py
@@ -1,8 +1,8 @@
 import csv
 from io import StringIO
 
-from infrastructure.s3_client import S3Client
-from infrastructure.s3_uri import parse_s3_uri
+from infrastructure.s3.s3_client import S3Client
+from infrastructure.s3.s3_uri import parse_s3_uri
 
 
 def _dedupe_fieldnames(fieldnames: list[str]) -> list[str]:
diff --git a/infrastructure/s3_client.py b/infrastructure/s3/s3_client.py
similarity index 100%
rename from infrastructure/s3_client.py
rename to infrastructure/s3/s3_client.py
diff --git a/infrastructure/s3_uri.py b/infrastructure/s3/s3_uri.py
similarity index 100%
rename from infrastructure/s3_uri.py
rename to infrastructure/s3/s3_uri.py
diff --git a/repositories/unstandardised_address/unstandardised_address_list_csv_s3_repository.py b/repositories/unstandardised_address/unstandardised_address_list_csv_s3_repository.py
index 260fce1d..20bae20c 100644
--- a/repositories/unstandardised_address/unstandardised_address_list_csv_s3_repository.py
+++ b/repositories/unstandardised_address/unstandardised_address_list_csv_s3_repository.py
@@ -6,7 +6,7 @@ from typing import Optional
 
 from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
 from domain.postcode import Postcode
-from infrastructure.csv_s3_client import CsvS3Client
+from infrastructure.s3.csv_s3_client import CsvS3Client
 from repositories.unstandardised_address.unstandardised_address_list_repository import (
     UnstandardisedAddressListRepository,
 )
diff --git a/tests/infrastructure/epc/__init__.py b/tests/infrastructure/epc/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/infrastructure/epc/gov_uk/__init__.py b/tests/infrastructure/epc/gov_uk/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/infrastructure/epc/gov_uk/conftest.py b/tests/infrastructure/epc/gov_uk/conftest.py
new file mode 100644
index 00000000..8fbd3094
--- /dev/null
+++ b/tests/infrastructure/epc/gov_uk/conftest.py
@@ -0,0 +1,49 @@
+import json
+import pathlib
+
+import pytest
+
+from infrastructure.epc.gov_uk.gov_uk_epc_client import GovUkEpcClient
+
+SAMPLES_DIR = pathlib.Path("backend/epc_api/json_samples")
+
+
+@pytest.fixture
+def rdsap_21_0_0_cert():
+    return json.loads((SAMPLES_DIR / "RdSAP-Schema-21.0.0/epc.json").read_text())
+
+
+@pytest.fixture
+def rdsap_21_0_1_cert():
+    return json.loads((SAMPLES_DIR / "RdSAP-Schema-21.0.1/epc.json").read_text())
+
+
+@pytest.fixture
+def epc_client():
+    return GovUkEpcClient(auth_token="test-token")
+
+
+def make_search_row(
+    cert_num="CERT-001",
+    address_line_1="1 Test Street",
+    postcode="SW1A 1AA",
+    post_town="London",
+    uprn=100023336956,
+    band="D",
+    registration_date="2024-01-01",
+    address_line_2=None,
+    address_line_3=None,
+    address_line_4=None,
+):
+    return {
+        "certificateNumber": cert_num,
+        "addressLine1": address_line_1,
+        "addressLine2": address_line_2,
+        "addressLine3": address_line_3,
+        "addressLine4": address_line_4,
+        "postcode": postcode,
+        "postTown": post_town,
+        "uprn": uprn,
+        "currentEnergyEfficiencyBand": band,
+        "registrationDate": registration_date,
+    }
diff --git a/tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py b/tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py
new file mode 100644
index 00000000..46164a0e
--- /dev/null
+++ b/tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py
@@ -0,0 +1,211 @@
+from unittest.mock import MagicMock, call, patch
+
+import pytest
+
+from datatypes.epc.domain.epc_property_data import EpcPropertyData
+from datatypes.epc.search import EpcSearchResult
+from infrastructure.epc.exceptions import EpcNotFoundError
+from tests.infrastructure.epc.gov_uk.conftest import make_search_row
+
+_SLEEP = "infrastructure.epc.gov_uk._retry.time.sleep"
+
+
+def _mock_response(status_code=200, json_data=None, headers=None):
+    resp = MagicMock()
+    resp.status_code = status_code
+    resp.is_success = 200 <= status_code < 300
+    resp.json.return_value = json_data or {}
+    resp.text = str(json_data)
+    resp.headers = headers or {}
+    return resp
+
+
+# ---------------------------------------------------------------------------
+# Test 1: get_by_certificate_number happy path
+# ---------------------------------------------------------------------------
+
+
+def test_get_by_certificate_number_returns_epc_property_data(
+    epc_client, rdsap_21_0_1_cert
+):
+    cert_response = {"data": rdsap_21_0_1_cert}
+    with patch("httpx.get", return_value=_mock_response(200, cert_response)):
+        result = epc_client.get_by_certificate_number("CERT-001")
+
+    assert isinstance(result, EpcPropertyData)
+
+
+# ---------------------------------------------------------------------------
+# Test 2: get_by_certificate_number 404 -> EpcNotFoundError
+# ---------------------------------------------------------------------------
+
+
+def test_get_by_certificate_number_404_raises_not_found(epc_client):
+    with patch("httpx.get", return_value=_mock_response(404)):
+        with pytest.raises(EpcNotFoundError):
+            epc_client.get_by_certificate_number("BAD-CERT")
+
+
+# ---------------------------------------------------------------------------
+# Test 3: 429 retried, succeeds on 3rd attempt
+# ---------------------------------------------------------------------------
+
+
+def test_get_by_certificate_number_retries_on_429_and_succeeds(
+    epc_client, rdsap_21_0_1_cert
+):
+    cert_response = {"data": rdsap_21_0_1_cert}
+    responses = [
+        _mock_response(429),
+        _mock_response(429),
+        _mock_response(200, cert_response),
+    ]
+    with patch("httpx.get", side_effect=responses), patch(_SLEEP):
+        result = epc_client.get_by_certificate_number("CERT-001")
+
+    assert isinstance(result, EpcPropertyData)
+
+
+# ---------------------------------------------------------------------------
+# Test 3b: 429 with Retry-After header -> sleeps for that value
+# ---------------------------------------------------------------------------
+
+
+def test_429_retry_after_header_drives_sleep_duration(
+    epc_client, rdsap_21_0_1_cert
+):
+    cert_response = {"data": rdsap_21_0_1_cert}
+    responses = [
+        _mock_response(429, headers={"Retry-After": "7"}),
+        _mock_response(200, cert_response),
+    ]
+    with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep:
+        epc_client.get_by_certificate_number("CERT-001")
+
+    mock_sleep.assert_called_once_with(7.0)
+
+
+# ---------------------------------------------------------------------------
+# Test 3c: 429 without Retry-After -> falls back to exponential backoff
+# ---------------------------------------------------------------------------
+
+
+def test_429_without_retry_after_uses_exponential_backoff(
+    epc_client, rdsap_21_0_1_cert
+):
+    cert_response = {"data": rdsap_21_0_1_cert}
+    responses = [
+        _mock_response(429),
+        _mock_response(429),
+        _mock_response(200, cert_response),
+    ]
+    with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep:
+        epc_client.get_by_certificate_number("CERT-001")
+
+    assert mock_sleep.call_args_list == [call(1.0), call(2.0)]
+
+
+# ---------------------------------------------------------------------------
+# Test 3d: malformed Retry-After header -> falls back to exponential backoff
+# ---------------------------------------------------------------------------
+
+
+def test_429_malformed_retry_after_falls_back_to_backoff(
+    epc_client, rdsap_21_0_1_cert
+):
+    cert_response = {"data": rdsap_21_0_1_cert}
+    responses = [
+        _mock_response(429, headers={"Retry-After": "Wed, 21 Oct 2026 07:28:00 GMT"}),
+        _mock_response(200, cert_response),
+    ]
+    with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep:
+        epc_client.get_by_certificate_number("CERT-001")
+
+    mock_sleep.assert_called_once_with(1.0)
+
+
+# ---------------------------------------------------------------------------
+# Test 3e: Retry-After capped by max_backoff to avoid hostile/buggy values
+# ---------------------------------------------------------------------------
+
+
+def test_429_retry_after_capped_by_max_backoff(epc_client, rdsap_21_0_1_cert):
+    cert_response = {"data": rdsap_21_0_1_cert}
+    responses = [
+        _mock_response(429, headers={"Retry-After": "9999"}),
+        _mock_response(200, cert_response),
+    ]
+    with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep:
+        epc_client.get_by_certificate_number("CERT-001")
+
+    mock_sleep.assert_called_once_with(60.0)
+
+
+# ---------------------------------------------------------------------------
+# Test 4: get_by_uprn empty search -> None
+# ---------------------------------------------------------------------------
+
+
+def test_get_by_uprn_returns_none_when_no_results(epc_client):
+    with patch("httpx.get", return_value=_mock_response(200, {"data": []})):
+        result = epc_client.get_by_uprn(100023336956)
+
+    assert result is None
+
+
+# ---------------------------------------------------------------------------
+# Test 5: get_by_uprn multiple results -> fetches latest by registration_date
+# ---------------------------------------------------------------------------
+
+
+def test_get_by_uprn_picks_most_recent_certificate(epc_client, rdsap_21_0_1_cert):
+    search_rows = [
+        make_search_row(cert_num="CERT-OLD", registration_date="2022-01-01"),
+        make_search_row(cert_num="CERT-NEW", registration_date="2024-06-01"),
+        make_search_row(cert_num="CERT-MID", registration_date="2023-03-15"),
+    ]
+    cert_response = {"data": rdsap_21_0_1_cert}
+
+    def fake_get(url, params=None, **kwargs):
+        if "search" in url:
+            return _mock_response(200, {"data": search_rows})
+        return _mock_response(200, cert_response)
+
+    with patch("httpx.get", side_effect=fake_get) as mock_get:
+        result = epc_client.get_by_uprn(100023336956)
+
+    assert isinstance(result, EpcPropertyData)
+    # Second call must be for the most recent cert
+    cert_call = mock_get.call_args_list[1]
+    assert cert_call.kwargs["params"]["certificate_number"] == "CERT-NEW"
+
+
+# ---------------------------------------------------------------------------
+# Test 6: search_by_postcode returns list[EpcSearchResult]
+# ---------------------------------------------------------------------------
+
+
+def test_search_by_postcode_returns_results(epc_client):
+    rows = [
+        make_search_row(cert_num="CERT-A", address_line_1="1 High Street"),
+        make_search_row(cert_num="CERT-B", address_line_1="2 High Street"),
+    ]
+    with patch("httpx.get", return_value=_mock_response(200, {"data": rows})):
+        results = epc_client.search_by_postcode("SW1A 1AA")
+
+    assert len(results) == 2
+    assert all(isinstance(r, EpcSearchResult) for r in results)
+    assert results[0].certificate_number == "CERT-A"
+    assert results[1].address_line_1 == "2 High Street"
+
+
+# ---------------------------------------------------------------------------
+# Test 7: search_by_postcode 404 -> empty list
+# ---------------------------------------------------------------------------
+
+
+def test_search_by_postcode_404_returns_empty_list(epc_client):
+    with patch("httpx.get", return_value=_mock_response(404)):
+        results = epc_client.search_by_postcode("ZZ9 9ZZ")
+
+    assert results == []
diff --git a/tests/infrastructure/test_csv_s3_client.py b/tests/infrastructure/test_csv_s3_client.py
index e7ec7eab..048a1cbe 100644
--- a/tests/infrastructure/test_csv_s3_client.py
+++ b/tests/infrastructure/test_csv_s3_client.py
@@ -3,7 +3,7 @@ from collections.abc import Iterator
 import pytest
 from moto import mock_aws
 
-from infrastructure.csv_s3_client import CsvS3Client
+from infrastructure.s3.csv_s3_client import CsvS3Client
 from tests.infrastructure import make_boto_client
 
 BUCKET = "csv-bucket"
diff --git a/tests/infrastructure/test_s3_client.py b/tests/infrastructure/test_s3_client.py
index 67db4f58..bdac6be1 100644
--- a/tests/infrastructure/test_s3_client.py
+++ b/tests/infrastructure/test_s3_client.py
@@ -3,7 +3,7 @@ from collections.abc import Iterator
 import pytest
 from moto import mock_aws
 
-from infrastructure.s3_client import S3Client
+from infrastructure.s3.s3_client import S3Client
 from tests.infrastructure import make_boto_client
 
 BUCKET = "test-bucket"
diff --git a/tests/infrastructure/test_s3_uri.py b/tests/infrastructure/test_s3_uri.py
index 32fd710f..f0865865 100644
--- a/tests/infrastructure/test_s3_uri.py
+++ b/tests/infrastructure/test_s3_uri.py
@@ -1,6 +1,6 @@
 import pytest
 
-from infrastructure.s3_uri import parse_s3_uri
+from infrastructure.s3.s3_uri import parse_s3_uri
 
 
 def test_parses_simple_s3_uri() -> None:
diff --git a/tests/orchestration/test_postcode_splitter_orchestrator.py b/tests/orchestration/test_postcode_splitter_orchestrator.py
index d21bcfba..9ad56094 100644
--- a/tests/orchestration/test_postcode_splitter_orchestrator.py
+++ b/tests/orchestration/test_postcode_splitter_orchestrator.py
@@ -13,7 +13,7 @@ from sqlalchemy import Engine
 from sqlmodel import Session
 
 from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
-from infrastructure.csv_s3_client import CsvS3Client
+from infrastructure.s3.csv_s3_client import CsvS3Client
 from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
 from orchestration.task_orchestrator import TaskOrchestrator
 from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
diff --git a/tests/repositories/unstandardised_address/test_unstandardised_address_list_csv_s3_repository.py b/tests/repositories/unstandardised_address/test_unstandardised_address_list_csv_s3_repository.py
index 866d6f2d..f86878c3 100644
--- a/tests/repositories/unstandardised_address/test_unstandardised_address_list_csv_s3_repository.py
+++ b/tests/repositories/unstandardised_address/test_unstandardised_address_list_csv_s3_repository.py
@@ -5,7 +5,7 @@ from moto import mock_aws
 
 from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
 from domain.postcode import Postcode
-from infrastructure.csv_s3_client import CsvS3Client
+from infrastructure.s3.csv_s3_client import CsvS3Client
 from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import (
     UnstandardisedAddressListCsvS3Repository,
 )

From c887153292e2581c217f9374648d5181ae84b260 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 22 May 2026 14:07:10 +0000
Subject: [PATCH 16/29] renamed to chatgpt

---
 infrastructure/{openai => chatgpt}/__init__.py            | 0
 .../{openai/openai_client.py => chatgpt/chatgpt.py}       | 8 ++++----
 infrastructure/{openai => chatgpt}/exceptions.py          | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)
 rename infrastructure/{openai => chatgpt}/__init__.py (100%)
 rename infrastructure/{openai/openai_client.py => chatgpt/chatgpt.py} (89%)
 rename infrastructure/{openai => chatgpt}/exceptions.py (54%)

diff --git a/infrastructure/openai/__init__.py b/infrastructure/chatgpt/__init__.py
similarity index 100%
rename from infrastructure/openai/__init__.py
rename to infrastructure/chatgpt/__init__.py
diff --git a/infrastructure/openai/openai_client.py b/infrastructure/chatgpt/chatgpt.py
similarity index 89%
rename from infrastructure/openai/openai_client.py
rename to infrastructure/chatgpt/chatgpt.py
index 34af4290..ee2a5b39 100644
--- a/infrastructure/openai/openai_client.py
+++ b/infrastructure/chatgpt/chatgpt.py
@@ -6,10 +6,10 @@ from typing import Optional
 from openai import OpenAI
 from openai.types.chat import ChatCompletionMessageParam
 
-from infrastructure.openai.exceptions import OpenAiClientError
+from infrastructure.chatgpt.exceptions import ChatGPTClientError
 
 
-class OpenAiChatClient:
+class ChatGPT:
     """Thin wrapper over the OpenAI Chat Completions API.
 
     Sends a single prompt and returns the assistant's reply as plain text.
@@ -24,7 +24,7 @@ class OpenAiChatClient:
     ) -> None:
         key = api_key or os.environ.get("OPENAI_API_KEY")
         if not key:
-            raise OpenAiClientError(
+            raise ChatGPTClientError(
                 "No OpenAI API key provided. "
                 "Pass api_key or set the OPENAI_API_KEY environment variable."
             )
@@ -56,5 +56,5 @@ class OpenAiChatClient:
         )
         content = response.choices[0].message.content
         if content is None:
-            raise OpenAiClientError("OpenAI returned an empty response.")
+            raise ChatGPTClientError("ChatGPT returned an empty response.")
         return content
diff --git a/infrastructure/openai/exceptions.py b/infrastructure/chatgpt/exceptions.py
similarity index 54%
rename from infrastructure/openai/exceptions.py
rename to infrastructure/chatgpt/exceptions.py
index 14cf95a2..31663f3d 100644
--- a/infrastructure/openai/exceptions.py
+++ b/infrastructure/chatgpt/exceptions.py
@@ -1,2 +1,2 @@
-class OpenAiClientError(Exception):
+class ChatGPTClientError(Exception):
     """Base for all OpenAI client errors."""

From e23bcd7e138c08205471b49675faf2f5fa433068 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 22 May 2026 14:51:28 +0000
Subject: [PATCH 17/29] chatgpt interface scaffold

---
 UBIQUITOUS_LANGUAGE.md      | 6 ++++++
 applications/SAL/handler.py | 7 +++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/UBIQUITOUS_LANGUAGE.md b/UBIQUITOUS_LANGUAGE.md
index d2fde99a..34dc3115 100644
--- a/UBIQUITOUS_LANGUAGE.md
+++ b/UBIQUITOUS_LANGUAGE.md
@@ -49,6 +49,12 @@ Invoke `/ubiquitous-language` in any session to extract new terms from the conve
 | **New EPC API** | The replacement government API (`api.get-energy-performance-data.communities.gov.uk`) using Bearer token auth. | "new API", "current API" |
 | **Bearer Token** | The auth credential required by the new EPC API; stored in the `EPC_AUTH_TOKEN` environment variable. | "API key", "auth token", "secret" |
 
+## Methodology
+
+| Term | Definition | Aliases to avoid |
+|------|------------|------------------|
+| **DDD** | Domain-Driven Design — the design approach this glossary supports, modelling software around a shared domain language. | "domain design", "driven design" |
+
 ## Relationships
 
 - An **EPC** belongs to exactly one **Dwelling** and has one **Certificate Number**.
diff --git a/applications/SAL/handler.py b/applications/SAL/handler.py
index f354171c..af3aa90f 100644
--- a/applications/SAL/handler.py
+++ b/applications/SAL/handler.py
@@ -9,8 +9,6 @@ from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repo
 )
 from domain.addresses.unstandardised_address import AddressList
 
-from infrastructure.epc.gov_uk import GovUkEpcClient
-
 
 def handler(
     body: dict[str, Any],
@@ -36,6 +34,11 @@ def handler(
 
     addressList: AddressList = sal.get_unstandardised_addresses(input_s3_uri=s3_uri)
 
+    column_mapping = {
+        # "Wall Description": "Walls",
+        "Property Type": "Property Type",
+    }
+
     col_to_desc_map = sal.get_col_to_description_mappings(
         list_of_unstandardised_address=addressList
     )

From d0e5aa9e3f7ccb8c63b1799671b7fb54f2af6862 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 22 May 2026 14:53:31 +0000
Subject: [PATCH 18/29] =?UTF-8?q?Classify=20a=20landlord=20description=20i?=
 =?UTF-8?q?nto=20a=20SAL=20property=20type=20=F0=9F=9F=A9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 domain/sal/__init__.py                        |  0
 domain/sal/property_type.py                   | 25 ++++++++++++
 domain/sal/property_type_classifier.py        | 27 +++++++++++++
 .../chatgpt_property_type_classifier.py       | 38 +++++++++++++++++++
 tests/infrastructure/chatgpt/__init__.py      |  0
 .../test_chatgpt_property_type_classifier.py  | 33 ++++++++++++++++
 6 files changed, 123 insertions(+)
 create mode 100644 domain/sal/__init__.py
 create mode 100644 domain/sal/property_type.py
 create mode 100644 domain/sal/property_type_classifier.py
 create mode 100644 infrastructure/chatgpt/chatgpt_property_type_classifier.py
 create mode 100644 tests/infrastructure/chatgpt/__init__.py
 create mode 100644 tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py

diff --git a/domain/sal/__init__.py b/domain/sal/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/domain/sal/property_type.py b/domain/sal/property_type.py
new file mode 100644
index 00000000..9659639a
--- /dev/null
+++ b/domain/sal/property_type.py
@@ -0,0 +1,25 @@
+from enum import Enum
+
+
+class PropertyType(Enum):
+    """A landlord-supplied property type, as resolved by the SAL context.
+
+    Distinct from the EPC context's ``PropertyType``: a landlord CSV value
+    may be unresolvable, so this enum carries an explicit ``UNKNOWN`` member.
+    """
+
+    HOUSE = "House"
+    BUNGALOW = "Bungalow"
+    FLAT = "Flat"
+    MAISONETTE = "Maisonette"
+    PARK_HOME = "Park home"
+    UNKNOWN = "Unknown"
+
+
+class PropertyTypeClassificationError(Exception):
+    """Raised when property-type classification fails wholesale.
+
+    A whole-batch failure (the AI backend is unreachable, or returns a reply
+    that cannot be parsed) raises this. A single description that merely
+    cannot be resolved is not an error -- it maps to ``PropertyType.UNKNOWN``.
+    """
diff --git a/domain/sal/property_type_classifier.py b/domain/sal/property_type_classifier.py
new file mode 100644
index 00000000..af941e83
--- /dev/null
+++ b/domain/sal/property_type_classifier.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+
+from domain.sal.property_type import PropertyType
+
+
+class PropertyTypeClassifier(ABC):
+    """Port: resolves free-text descriptions into SAL ``PropertyType`` values.
+
+    Implementations decide *how* (an LLM, a lookup table, a rules engine);
+    ``SALOrchestrator`` depends only on this interface.
+    """
+
+    @abstractmethod
+    def classify(self, descriptions: set[str]) -> dict[str, PropertyType]:
+        """Classify each description into a ``PropertyType``.
+
+        Every input description appears as a key in the result. A description
+        that cannot be resolved maps to ``PropertyType.UNKNOWN``.
+
+        Raises:
+            PropertyTypeClassificationError: If the classification call fails
+                wholesale (e.g. the backend is unreachable or returns an
+                unparseable response).
+        """
+        ...
diff --git a/infrastructure/chatgpt/chatgpt_property_type_classifier.py b/infrastructure/chatgpt/chatgpt_property_type_classifier.py
new file mode 100644
index 00000000..d4f0c060
--- /dev/null
+++ b/infrastructure/chatgpt/chatgpt_property_type_classifier.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+import json
+from typing import Any
+
+from domain.sal.property_type import PropertyType
+from domain.sal.property_type_classifier import PropertyTypeClassifier
+from infrastructure.chatgpt.chatgpt import ChatGPT
+
+
+class ChatGptPropertyTypeClassifier(PropertyTypeClassifier):
+    """PropertyTypeClassifier backed by the ChatGPT client."""
+
+    _CATEGORIES = ", ".join(
+        member.value
+        for member in PropertyType
+        if member is not PropertyType.UNKNOWN
+    )
+    _SYSTEM_PROMPT = (
+        "Classify each UK property description into exactly one category. "
+        f"Categories: {_CATEGORIES}. "
+        "Reply with only a JSON object mapping each original description "
+        "to its category, and nothing else."
+    )
+
+    def __init__(self, chat_gpt: ChatGPT) -> None:
+        self._chat_gpt = chat_gpt
+
+    def classify(self, descriptions: set[str]) -> dict[str, PropertyType]:
+        reply = self._chat_gpt.generate(
+            prompt=json.dumps(sorted(descriptions)),
+            system_prompt=self._SYSTEM_PROMPT,
+        )
+        raw: dict[str, Any] = json.loads(reply)
+        return {
+            description: PropertyType(raw[description])
+            for description in descriptions
+        }
diff --git a/tests/infrastructure/chatgpt/__init__.py b/tests/infrastructure/chatgpt/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py b/tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py
new file mode 100644
index 00000000..8c697eb2
--- /dev/null
+++ b/tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+from typing import Optional
+
+from domain.sal.property_type import PropertyType
+from infrastructure.chatgpt.chatgpt import ChatGPT
+from infrastructure.chatgpt.chatgpt_property_type_classifier import (
+    ChatGptPropertyTypeClassifier,
+)
+
+
+class _FakeChatGPT(ChatGPT):
+    """Hand-written ChatGPT stand-in: returns a canned reply, records prompts."""
+
+    def __init__(self, reply: str = "{}") -> None:
+        self.prompts: list[str] = []
+        self._reply = reply
+
+    def generate(self, prompt: str, system_prompt: Optional[str] = None) -> str:
+        self.prompts.append(prompt)
+        return self._reply
+
+
+def test_classifies_description_into_property_type() -> None:
+    # Arrange
+    chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}')
+    classifier = ChatGptPropertyTypeClassifier(chat_gpt)
+
+    # Act
+    result = classifier.classify({"semi-detached"})
+
+    # Assert
+    assert result == {"semi-detached": PropertyType.HOUSE}

From 11a498ba4e76a56a6797b2f99081f2bf84d8fb0f Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 22 May 2026 14:55:01 +0000
Subject: [PATCH 19/29] =?UTF-8?q?Map=20an=20unrecognised=20classification?=
 =?UTF-8?q?=20reply=20to=20UNKNOWN=20=F0=9F=9F=A5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../chatgpt/chatgpt_property_type_classifier.py      | 10 +++++++++-
 .../chatgpt/test_chatgpt_property_type_classifier.py | 12 ++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/infrastructure/chatgpt/chatgpt_property_type_classifier.py b/infrastructure/chatgpt/chatgpt_property_type_classifier.py
index d4f0c060..75ec1556 100644
--- a/infrastructure/chatgpt/chatgpt_property_type_classifier.py
+++ b/infrastructure/chatgpt/chatgpt_property_type_classifier.py
@@ -33,6 +33,14 @@ class ChatGptPropertyTypeClassifier(PropertyTypeClassifier):
         )
         raw: dict[str, Any] = json.loads(reply)
         return {
-            description: PropertyType(raw[description])
+            description: self._to_property_type(raw[description])
             for description in descriptions
         }
+
+    @staticmethod
+    def _to_property_type(value: Any) -> PropertyType:
+        """Map a reply value to a PropertyType, defaulting to UNKNOWN."""
+        try:
+            return PropertyType(value)
+        except ValueError:
+            return PropertyType.UNKNOWN
diff --git a/tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py b/tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py
index 8c697eb2..d4801154 100644
--- a/tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py
+++ b/tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py
@@ -31,3 +31,15 @@ def test_classifies_description_into_property_type() -> None:
 
     # Assert
     assert result == {"semi-detached": PropertyType.HOUSE}
+
+
+def test_unrecognised_category_maps_to_unknown() -> None:
+    # Arrange
+    chat_gpt = _FakeChatGPT(reply='{"garden shed": "Shed"}')
+    classifier = ChatGptPropertyTypeClassifier(chat_gpt)
+
+    # Act
+    result = classifier.classify({"garden shed"})
+
+    # Assert
+    assert result == {"garden shed": PropertyType.UNKNOWN}

From a747534f377ecaab6e518b6e8eb186fde4c6bfde Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 22 May 2026 15:28:26 +0000
Subject: [PATCH 20/29] refactored to allow multiple column types

---
 applications/SAL/handler.py                   |  48 ++++---
 domain/sal/column_classifier.py               |  39 +++++
 domain/sal/property_type.py                   |   9 --
 domain/sal/property_type_classifier.py        |  27 ----
 domain/sal/wall_type.py                       |  15 ++
 .../chatgpt/chatgpt_column_classifier.py      |  85 +++++++++++
 .../chatgpt_property_type_classifier.py       |  46 ------
 orchestration/sal_orchestrator.py             |  39 ++++-
 .../chatgpt/test_chatgpt_column_classifier.py | 135 ++++++++++++++++++
 .../test_chatgpt_property_type_classifier.py  |  45 ------
 ...lord_description_overrides_orchestrator.py |  85 ++++++++++-
 11 files changed, 420 insertions(+), 153 deletions(-)
 create mode 100644 domain/sal/column_classifier.py
 delete mode 100644 domain/sal/property_type_classifier.py
 create mode 100644 domain/sal/wall_type.py
 create mode 100644 infrastructure/chatgpt/chatgpt_column_classifier.py
 delete mode 100644 infrastructure/chatgpt/chatgpt_property_type_classifier.py
 create mode 100644 tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py
 delete mode 100644 tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py

diff --git a/applications/SAL/handler.py b/applications/SAL/handler.py
index af3aa90f..c1d73827 100644
--- a/applications/SAL/handler.py
+++ b/applications/SAL/handler.py
@@ -1,4 +1,6 @@
+import logging
 from typing import Any
+
 import boto3
 from orchestration.sal_orchestrator import (
     SALOrchestrator,
@@ -8,6 +10,15 @@ from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repo
     UnstandardisedAddressListCsvS3Repository,
 )
 from domain.addresses.unstandardised_address import AddressList
+from domain.sal.column_classifier import ColumnClassifier
+from domain.sal.property_type import PropertyType
+from domain.sal.wall_type import WallType
+from infrastructure.chatgpt.chatgpt import ChatGPT
+from infrastructure.chatgpt.chatgpt_column_classifier import (
+    ChatGptColumnClassifier,
+)
+
+logger = logging.getLogger(__name__)
 
 
 def handler(
@@ -28,32 +39,31 @@ def handler(
         csv_client, bucket
     )
 
+    # One ChatGPT-backed classifier per landlord-CSV column, keyed by column name.
+    chat_gpt = ChatGPT()
+    classifiers: dict[str, ColumnClassifier[Any]] = {
+        "Property Type": ChatGptColumnClassifier(
+            chat_gpt, PropertyType, PropertyType.UNKNOWN
+        ),
+        "Walls": ChatGptColumnClassifier(chat_gpt, WallType, WallType.UNKNOWN),
+    }
+
     sal = SALOrchestrator(
         unstandardised_address_repo=unstandardised_address_repo,
+        classifiers=classifiers,
     )
 
     addressList: AddressList = sal.get_unstandardised_addresses(input_s3_uri=s3_uri)
 
-    column_mapping = {
-        # "Wall Description": "Walls",
-        "Property Type": "Property Type",
-    }
+    # Cap the batch to the first 20 while the ChatGPT path is under test.
+    addressList = AddressList(addressList[:20])
 
-    col_to_desc_map = sal.get_col_to_description_mappings(
-        list_of_unstandardised_address=addressList
-    )
+    classified = sal.classify_columns(addressList)
+    for column, mapping in classified.items():
+        logger.info(
+            "Classified %d descriptions for column %r.", len(mapping), column
+        )
 
-    """
-    ----
-    # TODO Property Type:
-    # 1) Make a small enum with all property types (5 enum)
-    # 2) Make an interface with ChatGPTAi to get wall field description and map it to enum
-    # 3) Stroe in landlord overrides
-    # TODO Wall Type:
-    # 1) Make a small enum with all property types (5 enum)
-    # 2) Make an interface with ChatGPTAi to get wall field description and map it to enum
-    # 3) Stroe in landlord overrides
-    ---
-    """
+    # TODO: persist `classified` to landlord overrides.
 
     return {"hello": ["200"]}
diff --git a/domain/sal/column_classifier.py b/domain/sal/column_classifier.py
new file mode 100644
index 00000000..3324d79f
--- /dev/null
+++ b/domain/sal/column_classifier.py
@@ -0,0 +1,39 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from enum import Enum
+from typing import Generic, TypeVar
+
+E = TypeVar("E", bound=Enum)
+
+
+class ClassificationError(Exception):
+    """Raised when classifying a column's descriptions fails wholesale.
+
+    A whole-batch failure (the AI backend is unreachable, or returns a reply
+    that cannot be parsed) raises this. A single description that merely
+    cannot be resolved is not an error -- it maps to the enum's UNKNOWN member.
+    """
+
+
+class ColumnClassifier(ABC, Generic[E]):
+    """Port: resolves free-text descriptions into a category enum ``E``.
+
+    One classifier handles one landlord-CSV column. Implementations decide
+    *how* the mapping is performed (an LLM, a lookup table, a rules engine);
+    ``SALOrchestrator`` depends only on this interface.
+    """
+
+    @abstractmethod
+    def classify(self, descriptions: set[str]) -> dict[str, E]:
+        """Classify each description into a category enum member.
+
+        Every input description appears as a key in the result. A description
+        that cannot be resolved maps to the enum's UNKNOWN member.
+
+        Raises:
+            ClassificationError: If the classification call fails wholesale
+                (e.g. the backend is unreachable or returns an unparseable
+                response).
+        """
+        ...
diff --git a/domain/sal/property_type.py b/domain/sal/property_type.py
index 9659639a..3980c2f0 100644
--- a/domain/sal/property_type.py
+++ b/domain/sal/property_type.py
@@ -14,12 +14,3 @@ class PropertyType(Enum):
     MAISONETTE = "Maisonette"
     PARK_HOME = "Park home"
     UNKNOWN = "Unknown"
-
-
-class PropertyTypeClassificationError(Exception):
-    """Raised when property-type classification fails wholesale.
-
-    A whole-batch failure (the AI backend is unreachable, or returns a reply
-    that cannot be parsed) raises this. A single description that merely
-    cannot be resolved is not an error -- it maps to ``PropertyType.UNKNOWN``.
-    """
diff --git a/domain/sal/property_type_classifier.py b/domain/sal/property_type_classifier.py
deleted file mode 100644
index af941e83..00000000
--- a/domain/sal/property_type_classifier.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-
-from domain.sal.property_type import PropertyType
-
-
-class PropertyTypeClassifier(ABC):
-    """Port: resolves free-text descriptions into SAL ``PropertyType`` values.
-
-    Implementations decide *how* (an LLM, a lookup table, a rules engine);
-    ``SALOrchestrator`` depends only on this interface.
-    """
-
-    @abstractmethod
-    def classify(self, descriptions: set[str]) -> dict[str, PropertyType]:
-        """Classify each description into a ``PropertyType``.
-
-        Every input description appears as a key in the result. A description
-        that cannot be resolved maps to ``PropertyType.UNKNOWN``.
-
-        Raises:
-            PropertyTypeClassificationError: If the classification call fails
-                wholesale (e.g. the backend is unreachable or returns an
-                unparseable response).
-        """
-        ...
diff --git a/domain/sal/wall_type.py b/domain/sal/wall_type.py
new file mode 100644
index 00000000..05dc2ba9
--- /dev/null
+++ b/domain/sal/wall_type.py
@@ -0,0 +1,15 @@
+from enum import Enum
+
+
+class WallType(Enum):
+    """A landlord-supplied wall construction type, as resolved by the SAL context.
+
+    Mirrors the main RdSAP wall constructions. Like the SAL ``PropertyType``,
+    it carries an explicit ``UNKNOWN`` member for unresolvable CSV values.
+    """
+
+    CAVITY = "Cavity"
+    SOLID_BRICK = "Solid Brick"
+    TIMBER_FRAME = "Timber frame"
+    SANDSTONE = "Sandstone"
+    UNKNOWN = "Unknown"
diff --git a/infrastructure/chatgpt/chatgpt_column_classifier.py b/infrastructure/chatgpt/chatgpt_column_classifier.py
new file mode 100644
index 00000000..8f564e6c
--- /dev/null
+++ b/infrastructure/chatgpt/chatgpt_column_classifier.py
@@ -0,0 +1,85 @@
+from __future__ import annotations
+
+import json
+from enum import Enum
+from typing import Any, TypeVar
+
+from domain.sal.column_classifier import ClassificationError, ColumnClassifier
+from infrastructure.chatgpt.chatgpt import ChatGPT
+from infrastructure.chatgpt.exceptions import ChatGPTClientError
+
+E = TypeVar("E", bound=Enum)
+
+
+class ChatGptColumnClassifier(ColumnClassifier[E]):
+    """ColumnClassifier backed by ChatGPT, parametrised by a category enum.
+
+    The same classification path -- prompt, JSON parsing, UNKNOWN fallback --
+    serves any category enum; only ``category_enum`` and its ``unknown``
+    member differ between columns.
+    """
+
+    def __init__(
+        self,
+        chat_gpt: ChatGPT,
+        category_enum: type[E],
+        unknown: E,
+    ) -> None:
+        self._chat_gpt = chat_gpt
+        self._category_enum = category_enum
+        self._unknown = unknown
+
+    def classify(self, descriptions: set[str]) -> dict[str, E]:
+        if not descriptions:
+            return {}
+        try:
+            reply = self._chat_gpt.generate(
+                prompt=json.dumps(sorted(descriptions)),
+                system_prompt=self._system_prompt(),
+            )
+        except ChatGPTClientError as error:
+            raise ClassificationError(
+                f"ChatGPT classification failed for "
+                f"{self._category_enum.__name__}."
+            ) from error
+        try:
+            raw: dict[str, Any] = json.loads(self._strip_code_fence(reply))
+        except json.JSONDecodeError as error:
+            raise ClassificationError(
+                f"ChatGPT returned a reply that is not valid JSON: {reply!r}"
+            ) from error
+        return {
+            description: self._to_category(raw.get(description))
+            for description in descriptions
+        }
+
+    def _system_prompt(self) -> str:
+        categories = ", ".join(
+            member.value
+            for member in self._category_enum
+            if member is not self._unknown
+        )
+        return (
+            "Classify each free-text description into exactly one category. "
+            f"Categories: {categories}. "
+            "Reply with only a JSON object mapping each original description "
+            "to its category, and nothing else."
+        )
+
+    def _to_category(self, value: Any) -> E:
+        """Map a reply value to a category member, defaulting to UNKNOWN."""
+        try:
+            return self._category_enum(value)
+        except ValueError:
+            return self._unknown
+
+    @staticmethod
+    def _strip_code_fence(reply: str) -> str:
+        """Remove a surrounding markdown code fence, if ChatGPT added one."""
+        text = reply.strip()
+        if not text.startswith("```"):
+            return text
+        lines = text.splitlines()[1:]
+        if lines and lines[-1].strip() == "```":
+            lines = lines[:-1]
+        return "\n".join(lines)
diff --git a/infrastructure/chatgpt/chatgpt_property_type_classifier.py b/infrastructure/chatgpt/chatgpt_property_type_classifier.py
deleted file mode 100644
index 75ec1556..00000000
--- a/infrastructure/chatgpt/chatgpt_property_type_classifier.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from __future__ import annotations
-
-import json
-from typing import Any
-
-from domain.sal.property_type import PropertyType
-from domain.sal.property_type_classifier import PropertyTypeClassifier
-from infrastructure.chatgpt.chatgpt import ChatGPT
-
-
-class ChatGptPropertyTypeClassifier(PropertyTypeClassifier):
-    """PropertyTypeClassifier backed by the ChatGPT client."""
-
-    _CATEGORIES = ", ".join(
-        member.value
-        for member in PropertyType
-        if member is not PropertyType.UNKNOWN
-    )
-    _SYSTEM_PROMPT = (
-        "Classify each UK property description into exactly one category. "
-        f"Categories: {_CATEGORIES}. "
-        "Reply with only a JSON object mapping each original description "
-        "to its category, and nothing else."
-    )
-
-    def __init__(self, chat_gpt: ChatGPT) -> None:
-        self._chat_gpt = chat_gpt
-
-    def classify(self, descriptions: set[str]) -> dict[str, PropertyType]:
-        reply = self._chat_gpt.generate(
-            prompt=json.dumps(sorted(descriptions)),
-            system_prompt=self._SYSTEM_PROMPT,
-        )
-        raw: dict[str, Any] = json.loads(reply)
-        return {
-            description: self._to_property_type(raw[description])
-            for description in descriptions
-        }
-
-    @staticmethod
-    def _to_property_type(value: Any) -> PropertyType:
-        """Map a reply value to a PropertyType, defaulting to UNKNOWN."""
-        try:
-            return PropertyType(value)
-        except ValueError:
-            return PropertyType.UNKNOWN
diff --git a/orchestration/sal_orchestrator.py b/orchestration/sal_orchestrator.py
index 8ad21388..6b451746 100644
--- a/orchestration/sal_orchestrator.py
+++ b/orchestration/sal_orchestrator.py
@@ -1,12 +1,22 @@
+from enum import Enum
+from typing import Any
+
+from domain.addresses.unstandardised_address import AddressList
+from domain.sal.column_classifier import ColumnClassifier
 from repositories.unstandardised_address.unstandardised_address_list_repository import (
     UnstandardisedAddressListRepository,
 )
-from domain.addresses.unstandardised_address import AddressList
 
 
 class SALOrchestrator:
-    def __init__(self, unstandardised_address_repo: UnstandardisedAddressListRepository) -> None:
+    def __init__(
+        self,
+        unstandardised_address_repo: UnstandardisedAddressListRepository,
+        classifiers: dict[str, ColumnClassifier[Any]],
+    ) -> None:
         self._unstandardised_address_repo = unstandardised_address_repo
+        # Keyed by landlord-CSV column name.
+        self._classifiers = classifiers
 
     def get_unstandardised_addresses(
         self,
@@ -20,6 +30,27 @@ class SALOrchestrator:
         mappings: dict[str, set[str]] = {}
         for unstandardised_address in list_of_unstandardised_address:
             for key, value in unstandardised_address.additional_info.items():
-                # Lower-case so case-only typos collapse to one variant.
-                mappings.setdefault(key, set()).add(value.lower())
+                bucket = mappings.setdefault(key, set())
+                # A comma-separated value is several descriptions in one cell;
+                # split it so each is its own entry. Lower-case so case-only
+                # typos collapse to one variant.
+                for variant in value.split(","):
+                    variant = variant.strip().lower()
+                    if variant:
+                        bucket.add(variant)
         return mappings
+
+    def classify_columns(
+        self, addresses: AddressList
+    ) -> dict[str, dict[str, Enum]]:
+        """Classify every registered column's descriptions.
+
+        Returns a mapping of column name to ``{description: category}``. A
+        registered column absent from the addresses contributes an empty
+        inner mapping.
+        """
+        col_to_desc = self.get_col_to_description_mappings(addresses)
+        return {
+            column: classifier.classify(col_to_desc.get(column, set()))
+            for column, classifier in self._classifiers.items()
+        }
diff --git a/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py b/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py
new file mode 100644
index 00000000..5ec854f1
--- /dev/null
+++ b/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py
@@ -0,0 +1,135 @@
+from __future__ import annotations
+
+from typing import Optional
+
+import pytest
+
+from domain.sal.column_classifier import ClassificationError
+from domain.sal.property_type import PropertyType
+from domain.sal.wall_type import WallType
+from infrastructure.chatgpt.chatgpt import ChatGPT
+from infrastructure.chatgpt.chatgpt_column_classifier import (
+    ChatGptColumnClassifier,
+)
+from infrastructure.chatgpt.exceptions import ChatGPTClientError
+
+
+class _FakeChatGPT(ChatGPT):
+    """Hand-written ChatGPT stand-in: returns a canned reply, records prompts."""
+
+    def __init__(
+        self,
+        reply: str = "{}",
+        error: Optional[Exception] = None,
+    ) -> None:
+        self.prompts: list[str] = []
+        self._reply = reply
+        self._error = error
+
+    def generate(self, prompt: str, system_prompt: Optional[str] = None) -> str:
+        self.prompts.append(prompt)
+        if self._error is not None:
+            raise self._error
+        return self._reply
+
+
+def _property_type_classifier(
+    chat_gpt: ChatGPT,
+) -> ChatGptColumnClassifier[PropertyType]:
+    return ChatGptColumnClassifier(chat_gpt, PropertyType, PropertyType.UNKNOWN)
+
+
+def test_classifies_description_into_its_category() -> None:
+    # Arrange
+    chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}')
+    classifier = _property_type_classifier(chat_gpt)
+
+    # Act
+    result = classifier.classify({"semi-detached"})
+
+    # Assert
+    assert result == {"semi-detached": PropertyType.HOUSE}
+
+
+def test_classifies_when_reply_is_wrapped_in_a_markdown_fence() -> None:
+    # Arrange: ChatGPT wraps the JSON in a ```json ... ``` code fence.
+    chat_gpt = _FakeChatGPT(reply='```json\n{"semi-detached": "House"}\n```')
+    classifier = _property_type_classifier(chat_gpt)
+
+    # Act
+    result = classifier.classify({"semi-detached"})
+
+    # Assert
+    assert result == {"semi-detached": PropertyType.HOUSE}
+
+
+def test_unrecognised_category_maps_to_unknown() -> None:
+    # Arrange
+    chat_gpt = _FakeChatGPT(reply='{"garden shed": "Shed"}')
+    classifier = _property_type_classifier(chat_gpt)
+
+    # Act
+    result = classifier.classify({"garden shed"})
+
+    # Assert
+    assert result == {"garden shed": PropertyType.UNKNOWN}
+
+
+def test_description_omitted_from_reply_maps_to_unknown() -> None:
+    # Arrange: the reply classifies one description but not the other.
+    chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}')
+    classifier = _property_type_classifier(chat_gpt)
+
+    # Act
+    result = classifier.classify({"semi-detached", "TBC"})
+
+    # Assert
+    assert result == {
+        "semi-detached": PropertyType.HOUSE,
+        "TBC": PropertyType.UNKNOWN,
+    }
+
+
+def test_chatgpt_failure_raises_classification_error() -> None:
+    # Arrange
+    chat_gpt = _FakeChatGPT(error=ChatGPTClientError("backend unreachable"))
+    classifier = _property_type_classifier(chat_gpt)
+
+    # Act / Assert
+    with pytest.raises(ClassificationError):
+        classifier.classify({"semi-detached"})
+
+
+def test_non_json_reply_raises_classification_error_with_the_raw_reply() -> None:
+    # Arrange
+    chat_gpt = _FakeChatGPT(reply="sorry, I can't do that")
+    classifier = _property_type_classifier(chat_gpt)
+
+    # Act / Assert: the error surfaces the offending reply for diagnosis.
+    with pytest.raises(ClassificationError, match="sorry, I can't do that"):
+        classifier.classify({"semi-detached"})
+
+
+def test_empty_description_set_returns_empty_without_calling_chatgpt() -> None:
+    # Arrange
+    chat_gpt = _FakeChatGPT(reply='{"unused": "House"}')
+    classifier = _property_type_classifier(chat_gpt)
+
+    # Act
+    result = classifier.classify(set())
+
+    # Assert
+    assert result == {}
+    assert chat_gpt.prompts == []
+
+
+def test_classifies_with_a_different_category_enum() -> None:
+    # Arrange: the same adapter classifies a WallType column.
+    chat_gpt = _FakeChatGPT(reply='{"solid brick wall": "Solid Brick"}')
+    classifier = ChatGptColumnClassifier(chat_gpt, WallType, WallType.UNKNOWN)
+
+    # Act
+    result = classifier.classify({"solid brick wall"})
+
+    # Assert
+    assert result == {"solid brick wall": WallType.SOLID_BRICK}
diff --git a/tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py b/tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py
deleted file mode 100644
index d4801154..00000000
--- a/tests/infrastructure/chatgpt/test_chatgpt_property_type_classifier.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from __future__ import annotations
-
-from typing import Optional
-
-from domain.sal.property_type import PropertyType
-from infrastructure.chatgpt.chatgpt import ChatGPT
-from infrastructure.chatgpt.chatgpt_property_type_classifier import (
-    ChatGptPropertyTypeClassifier,
-)
-
-
-class _FakeChatGPT(ChatGPT):
-    """Hand-written ChatGPT stand-in: returns a canned reply, records prompts."""
-
-    def __init__(self, reply: str = "{}") -> None:
-        self.prompts: list[str] = []
-        self._reply = reply
-
-    def generate(self, prompt: str, system_prompt: Optional[str] = None) -> str:
-        self.prompts.append(prompt)
-        return self._reply
-
-
-def test_classifies_description_into_property_type() -> None:
-    # Arrange
-    chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}')
-    classifier = ChatGptPropertyTypeClassifier(chat_gpt)
-
-    # Act
-    result = classifier.classify({"semi-detached"})
-
-    # Assert
-    assert result == {"semi-detached": PropertyType.HOUSE}
-
-
-def test_unrecognised_category_maps_to_unknown() -> None:
-    # Arrange
-    chat_gpt = _FakeChatGPT(reply='{"garden shed": "Shed"}')
-    classifier = ChatGptPropertyTypeClassifier(chat_gpt)
-
-    # Act
-    result = classifier.classify({"garden shed"})
-
-    # Assert
-    assert result == {"garden shed": PropertyType.UNKNOWN}
diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
index b3658014..62f1a329 100644
--- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py
+++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
@@ -1,7 +1,13 @@
 from __future__ import annotations
 
+from enum import Enum
+from typing import Any, Optional
+
 from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
 from domain.postcode import Postcode
+from domain.sal.column_classifier import ColumnClassifier
+from domain.sal.property_type import PropertyType
+from domain.sal.wall_type import WallType
 from orchestration.sal_orchestrator import (
     SALOrchestrator,
 )
@@ -20,7 +26,21 @@ class _StubUnstandardisedAddressRepository(UnstandardisedAddressListRepository):
         raise NotImplementedError()
 
 
-def _make_unstandardised_address(landlord_additional_info: dict[str, str]) -> UnstandardisedAddress:
+class _StubColumnClassifier(ColumnClassifier[Enum]):
+    """Records the descriptions it received; returns a canned mapping."""
+
+    def __init__(self, result: dict[str, Enum]) -> None:
+        self.received: Optional[set[str]] = None
+        self._result = result
+
+    def classify(self, descriptions: set[str]) -> dict[str, Enum]:
+        self.received = descriptions
+        return self._result
+
+
+def _make_unstandardised_address(
+    landlord_additional_info: dict[str, str],
+) -> UnstandardisedAddress:
     return UnstandardisedAddress(
         address="1 High St",
         postcode=Postcode("AA1 1AA"),
@@ -28,8 +48,13 @@ def _make_unstandardised_address(landlord_additional_info: dict[str, str]) -> Un
     )
 
 
-def _orchestrator() -> SALOrchestrator:
-    return SALOrchestrator(unstandardised_address_repo=_StubUnstandardisedAddressRepository())
+def _orchestrator(
+    classifiers: Optional[dict[str, ColumnClassifier[Any]]] = None,
+) -> SALOrchestrator:
+    return SALOrchestrator(
+        unstandardised_address_repo=_StubUnstandardisedAddressRepository(),
+        classifiers=classifiers or {},
+    )
 
 
 def test_collects_every_value_per_shared_key() -> None:
@@ -86,6 +111,19 @@ def test_case_only_variants_collapse_to_one() -> None:
     assert mappings == {"description": {"cosy"}}
 
 
+def test_comma_separated_value_splits_into_individual_entries() -> None:
+    # arrange: a single cell packs several descriptions, comma-separated.
+    addresses = AddressList(
+        [_make_unstandardised_address({"description": "cosy, bright, COSY"})]
+    )
+
+    # act
+    mappings = _orchestrator().get_col_to_description_mappings(addresses)
+
+    # assert: each comma-separated part is its own trimmed, lower-cased entry.
+    assert mappings == {"description": {"cosy", "bright"}}
+
+
 def test_empty_address_list_yields_empty_mapping() -> None:
     # arrange / act
     mappings = _orchestrator().get_col_to_description_mappings(AddressList([]))
@@ -103,3 +141,44 @@ def test_single_address_yields_single_value_per_key() -> None:
 
     # assert
     assert mappings == {"description": {"cosy"}}
+
+
+def test_classify_columns_classifies_each_registered_column() -> None:
+    # arrange: addresses carry two classifiable columns.
+    addresses = AddressList(
+        [
+            _make_unstandardised_address(
+                {"Property Type": "semi-detached", "Walls": "solid brick"}
+            ),
+        ]
+    )
+    property_types = _StubColumnClassifier(
+        result={"semi-detached": PropertyType.HOUSE}
+    )
+    wall_types = _StubColumnClassifier(result={"solid brick": WallType.SOLID_BRICK})
+
+    # act
+    result = _orchestrator(
+        {"Property Type": property_types, "Walls": wall_types}
+    ).classify_columns(addresses)
+
+    # assert: each registered column was classified independently.
+    assert result == {
+        "Property Type": {"semi-detached": PropertyType.HOUSE},
+        "Walls": {"solid brick": WallType.SOLID_BRICK},
+    }
+
+
+def test_classify_columns_yields_empty_mapping_for_an_absent_column() -> None:
+    # arrange: a classifier is registered for a column the addresses lack.
+    addresses = AddressList([_make_unstandardised_address({"Walls": "cavity"})])
+    property_types = _StubColumnClassifier(result={})
+
+    # act
+    result = _orchestrator(
+        {"Property Type": property_types}
+    ).classify_columns(addresses)
+
+    # assert: the absent column classified an empty description set.
+    assert result == {"Property Type": {}}
+    assert property_types.received == set()

From 96aeed4f2ee6550555ae34ddd0d3b6bba3ea6c13 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 22 May 2026 15:36:46 +0000
Subject: [PATCH 21/29] Remove EPC and asset_list changes unrelated to SAL
 handler

This branch's objective is the SAL ingestion handler
(applications/SAL/handler.py) and its dependency tree. Drop work
that crept in but is unreferenced by it:

- EPC feature: domain/epc, infrastructure/epc (gov_uk + historical
  clients), tests/infrastructure/epc
- datatypes/epc edits (instantaneous_wwhrs Optional) reverted to main
- asset_list/app.py local data-file/column tweak reverted to main

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 asset_list/app.py                             |  13 +-
 datatypes/epc/domain/epc_property_data.py     |  22 +-
 datatypes/epc/schema/rdsap_schema_17_0.py     |   2 +-
 datatypes/epc/schema/rdsap_schema_17_1.py     |   2 +-
 datatypes/epc/schema/rdsap_schema_18_0.py     |   3 +-
 datatypes/epc/schema/rdsap_schema_19_0.py     |   2 +-
 datatypes/epc/schema/rdsap_schema_20_0_0.py   |   3 +-
 datatypes/epc/schema/rdsap_schema_21_0_0.py   |   4 +-
 datatypes/epc/schema/rdsap_schema_21_0_1.py   |   4 +-
 domain/epc/__init__.py                        |   4 -
 domain/epc/epc_record.py                      |  21 --
 domain/epc/property_type.py                   |   9 -
 infrastructure/epc/__init__.py                |  13 --
 infrastructure/epc/epc_client.py              |  41 ----
 infrastructure/epc/exceptions.py              |  17 --
 infrastructure/epc/gov_uk/__init__.py         |   6 -
 infrastructure/epc/gov_uk/_retry.py           |  34 ---
 .../epc/gov_uk/gov_uk_epc_client.py           | 132 -----------
 .../epc/gov_uk/gov_uk_property_type.py        |  25 ---
 .../__init__.py                               |   5 -
 ...orical_open_data_communities_epc_client.py |  24 --
 tests/infrastructure/epc/__init__.py          |   0
 tests/infrastructure/epc/gov_uk/__init__.py   |   0
 tests/infrastructure/epc/gov_uk/conftest.py   |  49 ----
 .../epc/gov_uk/test_gov_uk_epc_client.py      | 211 ------------------
 25 files changed, 21 insertions(+), 625 deletions(-)
 delete mode 100644 domain/epc/__init__.py
 delete mode 100644 domain/epc/epc_record.py
 delete mode 100644 domain/epc/property_type.py
 delete mode 100644 infrastructure/epc/__init__.py
 delete mode 100644 infrastructure/epc/epc_client.py
 delete mode 100644 infrastructure/epc/exceptions.py
 delete mode 100644 infrastructure/epc/gov_uk/__init__.py
 delete mode 100644 infrastructure/epc/gov_uk/_retry.py
 delete mode 100644 infrastructure/epc/gov_uk/gov_uk_epc_client.py
 delete mode 100644 infrastructure/epc/gov_uk/gov_uk_property_type.py
 delete mode 100644 infrastructure/epc/historical_open_data_communities/__init__.py
 delete mode 100644 infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py
 delete mode 100644 tests/infrastructure/epc/__init__.py
 delete mode 100644 tests/infrastructure/epc/gov_uk/__init__.py
 delete mode 100644 tests/infrastructure/epc/gov_uk/conftest.py
 delete mode 100644 tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py

diff --git a/asset_list/app.py b/asset_list/app.py
index aef410e6..424f4df6 100644
--- a/asset_list/app.py
+++ b/asset_list/app.py
@@ -79,17 +79,17 @@ def app():
     """
 
     data_folder = "/workspaces/model/asset_list"
-    data_filename = "asset_list (8).xlsx"
-    sheet_name = "Standardised Asset List"
-    postcode_column = "postcode"
-    address1_column = "domna_address_1"
+    data_filename = "hyde.xlsx"
+    sheet_name = "AddressProfilingResults"
+    postcode_column = "Postcode"
+    address1_column = "Address"
     address1_method = None
-    fulladdress_column = "domna_address_1"
+    fulladdress_column = "Postcode"
     address_cols_to_concat = []
     missing_postcodes_method = None
     landlord_year_built = None
     landlord_os_uprn = None
-    landlord_property_type = "landlord_property_id"  # Good to include if landlord gave
+    landlord_property_type = "Property Type"  # Good to include if landlord gave
     landlord_built_form = None  # Good to include if landlord gave
     landlord_wall_construction = None
     landlord_roof_construction = None
@@ -468,3 +468,4 @@ def app():
                 asset_list.duplicated_addresses.to_excel(
                     writer, sheet_name="Duplicate Properties", index=False
                 )
+
diff --git a/datatypes/epc/domain/epc_property_data.py b/datatypes/epc/domain/epc_property_data.py
index 68a25205..8795b389 100644
--- a/datatypes/epc/domain/epc_property_data.py
+++ b/datatypes/epc/domain/epc_property_data.py
@@ -29,9 +29,7 @@ class MainHeatingDetail:
     boiler_flue_type: Optional[int] = None  # TODO: make enum?
     boiler_ignition_type: Optional[int] = None  # TODO: make enum?
     central_heating_pump_age: Optional[int] = None
-    central_heating_pump_age_str: Optional[str] = (
-        None  # str from site notes e.g. "Unknown", "Pre 2013"
-    )
+    central_heating_pump_age_str: Optional[str] = None  # str from site notes e.g. "Unknown", "Pre 2013"
     main_heating_index_number: Optional[int] = None
     sap_main_heating_code: Optional[int] = None  # TODO: make enum?
     main_heating_number: Optional[int] = None
@@ -56,7 +54,7 @@ class ShowerOutlets:
 
 @dataclass
 class SapHeating:
-    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
+    instantaneous_wwhrs: InstantaneousWwhrs
     main_heating_details: List[MainHeatingDetail]
     has_fixed_air_conditioning: bool
     cylinder_size: Optional[Union[int, str]] = (
@@ -69,9 +67,7 @@ class SapHeating:
     cylinder_insulation_type: Optional[Union[int, str]] = None
     cylinder_thermostat: Optional[str] = None
     secondary_fuel_type: Optional[int] = None
-    secondary_heating_type: Optional[Union[int, str]] = (
-        None  # int from API; str from site notes
-    )
+    secondary_heating_type: Optional[Union[int, str]] = None  # int from API; str from site notes
     cylinder_insulation_thickness_mm: Optional[int] = None
 
 
@@ -79,9 +75,7 @@ class SapHeating:
 class SapVentilation:
     ventilation_type: Optional[str] = None
     draught_lobby: Optional[bool] = None
-    pressure_test: Optional[str] = (
-        None  # str from site notes e.g. "No test"; int in API via mechanical_ventilation
-    )
+    pressure_test: Optional[str] = None  # str from site notes e.g. "No test"; int in API via mechanical_ventilation
     open_flues_count: Optional[int] = None
     closed_flues_count: Optional[int] = None
     boiler_flues_count: Optional[int] = None
@@ -225,12 +219,8 @@ class SapBuildingPart:
         None  # TODO: make enum/mapping?
     )
     floor_type: Optional[str] = None  # str from site notes e.g. "Ground Floor"
-    floor_construction_type: Optional[str] = (
-        None  # str from site notes; distinct from floor_construction: int in SapFloorDimension
-    )
-    floor_insulation_type_str: Optional[str] = (
-        None  # str from site notes e.g. "As Built"
-    )
+    floor_construction_type: Optional[str] = None  # str from site notes; distinct from floor_construction: int in SapFloorDimension
+    floor_insulation_type_str: Optional[str] = None  # str from site notes e.g. "As Built"
     floor_u_value_known: Optional[bool] = None
 
     roof_construction: Optional[int] = None
diff --git a/datatypes/epc/schema/rdsap_schema_17_0.py b/datatypes/epc/schema/rdsap_schema_17_0.py
index 9cbedf97..22aaded4 100644
--- a/datatypes/epc/schema/rdsap_schema_17_0.py
+++ b/datatypes/epc/schema/rdsap_schema_17_0.py
@@ -37,7 +37,7 @@ class SapHeating:
     cylinder_size: int
     water_heating_code: int
     water_heating_fuel: int
-    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
+    instantaneous_wwhrs: InstantaneousWwhrs
     main_heating_details: List[MainHeatingDetail]
     immersion_heating_type: Union[int, str]
     cylinder_insulation_type: int
diff --git a/datatypes/epc/schema/rdsap_schema_17_1.py b/datatypes/epc/schema/rdsap_schema_17_1.py
index b0af07e6..a4c007ed 100644
--- a/datatypes/epc/schema/rdsap_schema_17_1.py
+++ b/datatypes/epc/schema/rdsap_schema_17_1.py
@@ -41,7 +41,7 @@ class SapHeating:
     cylinder_size: int
     water_heating_code: int
     water_heating_fuel: int
-    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
+    instantaneous_wwhrs: InstantaneousWwhrs
     main_heating_details: List[MainHeatingDetail]
     immersion_heating_type: Union[int, str]
     cylinder_insulation_type: int
diff --git a/datatypes/epc/schema/rdsap_schema_18_0.py b/datatypes/epc/schema/rdsap_schema_18_0.py
index 4ce2f887..a038dc9b 100644
--- a/datatypes/epc/schema/rdsap_schema_18_0.py
+++ b/datatypes/epc/schema/rdsap_schema_18_0.py
@@ -41,7 +41,7 @@ class SapHeating:
     cylinder_size: int
     water_heating_code: int
     water_heating_fuel: int
-    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
+    instantaneous_wwhrs: InstantaneousWwhrs
     main_heating_details: List[MainHeatingDetail]
     immersion_heating_type: Union[int, str]
     has_fixed_air_conditioning: str
@@ -86,7 +86,6 @@ class SapFloorDimension:
 @dataclass
 class SapRoomInRoof:
     """Room-in-roof details. floor_area is a Measurement object in schema 18.0."""
-
     floor_area: Measurement
     insulation: str
     roof_room_connected: str
diff --git a/datatypes/epc/schema/rdsap_schema_19_0.py b/datatypes/epc/schema/rdsap_schema_19_0.py
index b3c77ec4..b94d9bb3 100644
--- a/datatypes/epc/schema/rdsap_schema_19_0.py
+++ b/datatypes/epc/schema/rdsap_schema_19_0.py
@@ -41,7 +41,7 @@ class SapHeating:
     cylinder_size: int
     water_heating_code: int
     water_heating_fuel: int
-    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
+    instantaneous_wwhrs: InstantaneousWwhrs
     main_heating_details: List[MainHeatingDetail]
     immersion_heating_type: Union[int, str]
     has_fixed_air_conditioning: str
diff --git a/datatypes/epc/schema/rdsap_schema_20_0_0.py b/datatypes/epc/schema/rdsap_schema_20_0_0.py
index 9deb235e..8f3986a2 100644
--- a/datatypes/epc/schema/rdsap_schema_20_0_0.py
+++ b/datatypes/epc/schema/rdsap_schema_20_0_0.py
@@ -49,7 +49,7 @@ class SapHeating:
     cylinder_size: int
     water_heating_code: int
     water_heating_fuel: int
-    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
+    instantaneous_wwhrs: InstantaneousWwhrs
     main_heating_details: List[MainHeatingDetail]
     immersion_heating_type: Union[int, str]
     has_fixed_air_conditioning: str
@@ -103,7 +103,6 @@ class SapFloorDimension:
 @dataclass
 class SapRoomInRoof:
     """Room-in-roof details. floor_area is a plain number in schema 20.0.0 (not a Measurement object)."""
-
     floor_area: Union[int, float]
     insulation: str
     roof_room_connected: str
diff --git a/datatypes/epc/schema/rdsap_schema_21_0_0.py b/datatypes/epc/schema/rdsap_schema_21_0_0.py
index 8d19e5f9..eee00cb8 100644
--- a/datatypes/epc/schema/rdsap_schema_21_0_0.py
+++ b/datatypes/epc/schema/rdsap_schema_21_0_0.py
@@ -33,7 +33,6 @@ class ShowerOutlets:
 @dataclass
 class InstantaneousWwhrs:
     """Changed in 21.0.0: references WWHRS product index numbers instead of room counts."""
-
     wwhrs_index_number1: Optional[int] = None
     wwhrs_index_number2: Optional[int] = None
 
@@ -62,7 +61,7 @@ class SapHeating:
     cylinder_size: int
     water_heating_code: int
     water_heating_fuel: int
-    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
+    instantaneous_wwhrs: InstantaneousWwhrs
     main_heating_details: List[MainHeatingDetail]
     immersion_heating_type: Union[int, str]
     has_fixed_air_conditioning: str
@@ -155,7 +154,6 @@ class SapFloorDimension:
 @dataclass
 class SapRoomInRoof:
     """Room-in-roof details. insulation and roof_room_connected removed in schema 21.0.0."""
-
     floor_area: Union[int, float]
     construction_age_band: str
 
diff --git a/datatypes/epc/schema/rdsap_schema_21_0_1.py b/datatypes/epc/schema/rdsap_schema_21_0_1.py
index f6be7cc3..9b3dbd1d 100644
--- a/datatypes/epc/schema/rdsap_schema_21_0_1.py
+++ b/datatypes/epc/schema/rdsap_schema_21_0_1.py
@@ -50,7 +50,7 @@ class MainHeatingDetail:
     main_heating_fraction: int
     main_heating_data_source: int
     boiler_flue_type: Optional[int] = None
-    fan_flue_present: Optional[str] = None  # TODO: make bool
+    fan_flue_present: Optional[str] = None # TODO: make bool
     boiler_ignition_type: Optional[int] = None
     central_heating_pump_age: Optional[int] = None
     main_heating_index_number: Optional[int] = None
@@ -62,7 +62,7 @@ class SapHeating:
     cylinder_size: int
     water_heating_code: int
     water_heating_fuel: int
-    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
+    instantaneous_wwhrs: InstantaneousWwhrs
     main_heating_details: List[MainHeatingDetail]
     immersion_heating_type: Union[int, str]
     has_fixed_air_conditioning: str
diff --git a/domain/epc/__init__.py b/domain/epc/__init__.py
deleted file mode 100644
index e49fea42..00000000
--- a/domain/epc/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from domain.epc.epc_record import EpcRecord
-from domain.epc.property_type import PropertyType
-
-__all__ = ["EpcRecord", "PropertyType"]
diff --git a/domain/epc/epc_record.py b/domain/epc/epc_record.py
deleted file mode 100644
index 7194d1d6..00000000
--- a/domain/epc/epc_record.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Optional
-
-from domain.epc.property_type import PropertyType
-
-
-@dataclass(frozen=True)
-class EpcRecord:
-    """A streamlined record of EPC property data.
-
-    A focused subset of the full ``EpcPropertyData``: a property's identity
-    plus its typed property type. Grow this with further fields as the
-    domain needs them.
-    """
-
-    address_line_1: str
-    postcode: str
-    uprn: Optional[int]
-    property_type: PropertyType
diff --git a/domain/epc/property_type.py b/domain/epc/property_type.py
deleted file mode 100644
index 707988aa..00000000
--- a/domain/epc/property_type.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from enum import Enum
-
-
-class PropertyType(Enum):
-    HOUSE = "House"
-    BUNGALOW = "Bungalow"
-    FLAT = "Flat"
-    MAISONETTE = "Maisonette"
-    PARK_HOME = "Park home"
diff --git a/infrastructure/epc/__init__.py b/infrastructure/epc/__init__.py
deleted file mode 100644
index f99a7cb3..00000000
--- a/infrastructure/epc/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from infrastructure.epc.epc_client import EpcClient
-from infrastructure.epc.exceptions import (
-    EpcApiError,
-    EpcNotFoundError,
-    EpcRateLimitError,
-)
-
-__all__ = [
-    "EpcApiError",
-    "EpcClient",
-    "EpcNotFoundError",
-    "EpcRateLimitError",
-]
diff --git a/infrastructure/epc/epc_client.py b/infrastructure/epc/epc_client.py
deleted file mode 100644
index d1f8639c..00000000
--- a/infrastructure/epc/epc_client.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from typing import Optional
-
-from datatypes.epc.domain.epc_property_data import EpcPropertyData
-from datatypes.epc.search import EpcSearchResult
-
-
-class EpcClient(ABC):
-    """Interface for retrieving EPC (Energy Performance Certificate) data.
-
-    Implementations fetch from a data source and return domain objects;
-    callers depend only on this interface, not on a concrete transport.
-    """
-
-    @abstractmethod
-    def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
-        """Return the EPC certificates registered at ``postcode``.
-
-        Returns an empty list when the postcode has no certificates.
-        """
-        ...
-
-    @abstractmethod
-    def get_by_certificate_number(
-        self, certificate_number: str
-    ) -> EpcPropertyData:
-        """Return the full EPC record for a certificate number.
-
-        Raises EpcNotFoundError when no such certificate exists.
-        """
-        ...
-
-    @abstractmethod
-    def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]:
-        """Return the most recent EPC record for ``uprn``.
-
-        Returns None when the UPRN has no certificates.
-        """
-        ...
diff --git a/infrastructure/epc/exceptions.py b/infrastructure/epc/exceptions.py
deleted file mode 100644
index 8e2e5165..00000000
--- a/infrastructure/epc/exceptions.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from typing import Optional
-
-
-class EpcApiError(Exception):
-    """Base for all EPC client errors."""
-
-
-class EpcNotFoundError(EpcApiError):
-    """Raised when the API returns 404 for a resource that must exist."""
-
-
-class EpcRateLimitError(EpcApiError):
-    """Raised when the API returns 429 and all retries are exhausted."""
-
-    def __init__(self, message: str, retry_after: Optional[float] = None) -> None:
-        super().__init__(message)
-        self.retry_after = retry_after
diff --git a/infrastructure/epc/gov_uk/__init__.py b/infrastructure/epc/gov_uk/__init__.py
deleted file mode 100644
index d491a1ef..00000000
--- a/infrastructure/epc/gov_uk/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-from infrastructure.epc.gov_uk.gov_uk_epc_client import GovUkEpcClient
-from infrastructure.epc.gov_uk.gov_uk_property_type import (
-    property_type_from_gov_uk_code,
-)
-
-__all__ = ["GovUkEpcClient", "property_type_from_gov_uk_code"]
diff --git a/infrastructure/epc/gov_uk/_retry.py b/infrastructure/epc/gov_uk/_retry.py
deleted file mode 100644
index db92b131..00000000
--- a/infrastructure/epc/gov_uk/_retry.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import time
-from typing import Callable, Optional, TypeVar
-
-from infrastructure.epc.exceptions import EpcRateLimitError
-
-T = TypeVar("T")
-
-
-def call_with_retry(
-    fn: Callable[[], T],
-    max_retries: int = 5,
-    backoff_base: float = 1.0,
-    backoff_multiplier: float = 2.0,
-    max_backoff: float = 60.0,
-) -> T:
-    """Call ``fn``, retrying on EpcRateLimitError with exponential backoff.
-
-    Honours the API's ``Retry-After`` header when present, otherwise backs off
-    ``backoff_base * backoff_multiplier ** attempt`` (capped at ``max_backoff``).
-    """
-    last_exc: Optional[EpcRateLimitError] = None
-    for attempt in range(max_retries + 1):
-        try:
-            return fn()
-        except EpcRateLimitError as exc:
-            last_exc = exc
-            if attempt < max_retries:
-                if exc.retry_after is not None:
-                    delay = exc.retry_after
-                else:
-                    delay = backoff_base * (backoff_multiplier**attempt)
-                time.sleep(min(delay, max_backoff))
-    assert last_exc is not None
-    raise last_exc
diff --git a/infrastructure/epc/gov_uk/gov_uk_epc_client.py b/infrastructure/epc/gov_uk/gov_uk_epc_client.py
deleted file mode 100644
index ac0db09f..00000000
--- a/infrastructure/epc/gov_uk/gov_uk_epc_client.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# Spec: https://raw.githubusercontent.com/communitiesuk/epb-data-warehouse/main/api/api.yml
-from __future__ import annotations
-
-from typing import Any, Optional
-
-import httpx
-
-from datatypes.epc.domain.epc_property_data import EpcPropertyData
-from datatypes.epc.domain.mapper import EpcPropertyDataMapper
-from datatypes.epc.search import EpcSearchResult
-from infrastructure.epc.epc_client import EpcClient
-from infrastructure.epc.exceptions import (
-    EpcApiError,
-    EpcNotFoundError,
-    EpcRateLimitError,
-)
-from infrastructure.epc.gov_uk._retry import call_with_retry
-
-
-class GovUkEpcClient(EpcClient):
-    """EpcClient backed by the live gov.uk EPC API.
-
-    Endpoint: https://api.get-energy-performance-data.communities.gov.uk
-    """
-
-    BASE_URL = "https://api.get-energy-performance-data.communities.gov.uk"
-    REQUEST_TIMEOUT = 10.0
-
-    def __init__(self, auth_token: str) -> None:
-        self._headers = {
-            "Authorization": f"Bearer {auth_token}",
-            "Accept": "application/json",
-        }
-
-    def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
-        normalised = self._normalise_postcode(postcode)
-        return call_with_retry(lambda: self._search(postcode=normalised))
-
-    def get_by_certificate_number(
-        self, certificate_number: str
-    ) -> EpcPropertyData:
-        raw = call_with_retry(lambda: self._fetch_certificate(certificate_number))
-        return EpcPropertyDataMapper.from_api_response(raw)
-
-    def get_by_uprn(self, uprn: int) -> Optional[EpcPropertyData]:
-        results = call_with_retry(lambda: self._search(uprn=uprn))
-        if not results:
-            return None
-        latest = max(results, key=lambda r: r.registration_date)
-        return self.get_by_certificate_number(latest.certificate_number)
-
-    # ------------------------------------------------------------------
-    # Private helpers
-    # ------------------------------------------------------------------
-
-    @staticmethod
-    def _normalise_postcode(postcode: str) -> str:
-        """Return the postcode with all spaces removed and uppercased."""
-        return postcode.replace(" ", "").upper()
-
-    @staticmethod
-    def _parse_retry_after(resp: httpx.Response) -> Optional[float]:
-        header = resp.headers.get("Retry-After")
-        if header is None:
-            return None
-        try:
-            return float(header)
-        except (TypeError, ValueError):
-            return None
-
-    def _fetch_certificate(self, certificate_number: str) -> dict[str, Any]:
-        resp = httpx.get(
-            f"{self.BASE_URL}/api/certificate",
-            params={"certificate_number": certificate_number},
-            headers=self._headers,
-            timeout=self.REQUEST_TIMEOUT,
-        )
-        if resp.status_code == 404:
-            raise EpcNotFoundError(certificate_number)
-        if resp.status_code == 429:
-            raise EpcRateLimitError(
-                "Rate limited by EPC API",
-                retry_after=self._parse_retry_after(resp),
-            )
-        if not resp.is_success:
-            raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}")
-        return resp.json()["data"]
-
-    def _search(
-        self,
-        postcode: Optional[str] = None,
-        uprn: Optional[int] = None,
-    ) -> list[EpcSearchResult]:
-        params: dict[str, str | int] = {}
-        if postcode:
-            params["postcode"] = postcode
-        if uprn is not None:
-            params["uprn"] = uprn
-
-        resp = httpx.get(
-            f"{self.BASE_URL}/api/domestic/search",
-            params=params,
-            headers=self._headers,
-            timeout=self.REQUEST_TIMEOUT,
-        )
-        if resp.status_code == 404:
-            return []
-        if resp.status_code == 429:
-            raise EpcRateLimitError(
-                "Rate limited by EPC API",
-                retry_after=self._parse_retry_after(resp),
-            )
-        if not resp.is_success:
-            raise EpcApiError(f"EPC API error {resp.status_code}: {resp.text}")
-
-        rows = resp.json().get("data", [])
-        return [self._parse_search_result(row) for row in rows]
-
-    @staticmethod
-    def _parse_search_result(row: dict[str, Any]) -> EpcSearchResult:
-        return EpcSearchResult(
-            certificate_number=row["certificateNumber"],
-            address_line_1=row["addressLine1"],
-            address_line_2=row.get("addressLine2"),
-            address_line_3=row.get("addressLine3"),
-            address_line_4=row.get("addressLine4"),
-            postcode=row["postcode"],
-            post_town=row["postTown"],
-            uprn=row.get("uprn"),
-            current_energy_efficiency_band=row["currentEnergyEfficiencyBand"],
-            registration_date=row["registrationDate"],
-        )
diff --git a/infrastructure/epc/gov_uk/gov_uk_property_type.py b/infrastructure/epc/gov_uk/gov_uk_property_type.py
deleted file mode 100644
index a0f4a7a3..00000000
--- a/infrastructure/epc/gov_uk/gov_uk_property_type.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from domain.epc.property_type import PropertyType
-
-# GOV.UK EPC API ``property_type`` integer codes mapped to the domain type.
-# This translation is GOV.UK-specific and lives in the infrastructure layer so
-# the domain ``PropertyType`` stays free of any source encoding.
-_PROPERTY_TYPE_BY_GOV_UK_CODE: dict[int, PropertyType] = {
-    0: PropertyType.HOUSE,
-    1: PropertyType.BUNGALOW,
-    2: PropertyType.FLAT,
-    3: PropertyType.MAISONETTE,
-    4: PropertyType.PARK_HOME,
-}
-
-
-def property_type_from_gov_uk_code(code: int) -> PropertyType:
-    """Translate a GOV.UK EPC ``property_type`` code to the domain PropertyType.
-
-    Raises ValueError for a code GOV.UK has not been mapped here yet.
-    """
-    try:
-        return _PROPERTY_TYPE_BY_GOV_UK_CODE[code]
-    except KeyError:
-        raise ValueError(
-            f"Unknown GOV.UK EPC property type code: {code}"
-        ) from None
diff --git a/infrastructure/epc/historical_open_data_communities/__init__.py b/infrastructure/epc/historical_open_data_communities/__init__.py
deleted file mode 100644
index 88a69081..00000000
--- a/infrastructure/epc/historical_open_data_communities/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from infrastructure.epc.historical_open_data_communities.historical_open_data_communities_epc_client import (
-    HistoricalOpenDataCommunitiesEpcClient,
-)
-
-__all__ = ["HistoricalOpenDataCommunitiesEpcClient"]
diff --git a/infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py b/infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py
deleted file mode 100644
index d8c7f9ac..00000000
--- a/infrastructure/epc/historical_open_data_communities/historical_open_data_communities_epc_client.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from __future__ import annotations
-
-from typing import Optional
-
-from domain.epc.epc_record import EpcRecord
-
-
-class HistoricalOpenDataCommunitiesEpcClient:
-    """EPC client backed by Open Data Communities' historical EPC data.
-
-    Stub — not yet implemented. Every method raises NotImplementedError for
-    now. Unlike GovUkEpcClient it returns the domain ``EpcRecord`` directly;
-    once the ``EpcClient`` port is migrated to return ``EpcRecord``, this
-    adapter should implement it.
-    """
-
-    def search_by_postcode(self, postcode: str) -> list[EpcRecord]:
-        raise NotImplementedError
-
-    def get_by_certificate_number(self, certificate_number: str) -> EpcRecord:
-        raise NotImplementedError
-
-    def get_by_uprn(self, uprn: int) -> Optional[EpcRecord]:
-        raise NotImplementedError
diff --git a/tests/infrastructure/epc/__init__.py b/tests/infrastructure/epc/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/infrastructure/epc/gov_uk/__init__.py b/tests/infrastructure/epc/gov_uk/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/infrastructure/epc/gov_uk/conftest.py b/tests/infrastructure/epc/gov_uk/conftest.py
deleted file mode 100644
index 8fbd3094..00000000
--- a/tests/infrastructure/epc/gov_uk/conftest.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import json
-import pathlib
-
-import pytest
-
-from infrastructure.epc.gov_uk.gov_uk_epc_client import GovUkEpcClient
-
-SAMPLES_DIR = pathlib.Path("backend/epc_api/json_samples")
-
-
-@pytest.fixture
-def rdsap_21_0_0_cert():
-    return json.loads((SAMPLES_DIR / "RdSAP-Schema-21.0.0/epc.json").read_text())
-
-
-@pytest.fixture
-def rdsap_21_0_1_cert():
-    return json.loads((SAMPLES_DIR / "RdSAP-Schema-21.0.1/epc.json").read_text())
-
-
-@pytest.fixture
-def epc_client():
-    return GovUkEpcClient(auth_token="test-token")
-
-
-def make_search_row(
-    cert_num="CERT-001",
-    address_line_1="1 Test Street",
-    postcode="SW1A 1AA",
-    post_town="London",
-    uprn=100023336956,
-    band="D",
-    registration_date="2024-01-01",
-    address_line_2=None,
-    address_line_3=None,
-    address_line_4=None,
-):
-    return {
-        "certificateNumber": cert_num,
-        "addressLine1": address_line_1,
-        "addressLine2": address_line_2,
-        "addressLine3": address_line_3,
-        "addressLine4": address_line_4,
-        "postcode": postcode,
-        "postTown": post_town,
-        "uprn": uprn,
-        "currentEnergyEfficiencyBand": band,
-        "registrationDate": registration_date,
-    }
diff --git a/tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py b/tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py
deleted file mode 100644
index 46164a0e..00000000
--- a/tests/infrastructure/epc/gov_uk/test_gov_uk_epc_client.py
+++ /dev/null
@@ -1,211 +0,0 @@
-from unittest.mock import MagicMock, call, patch
-
-import pytest
-
-from datatypes.epc.domain.epc_property_data import EpcPropertyData
-from datatypes.epc.search import EpcSearchResult
-from infrastructure.epc.exceptions import EpcNotFoundError
-from tests.infrastructure.epc.gov_uk.conftest import make_search_row
-
-_SLEEP = "infrastructure.epc.gov_uk._retry.time.sleep"
-
-
-def _mock_response(status_code=200, json_data=None, headers=None):
-    resp = MagicMock()
-    resp.status_code = status_code
-    resp.is_success = 200 <= status_code < 300
-    resp.json.return_value = json_data or {}
-    resp.text = str(json_data)
-    resp.headers = headers or {}
-    return resp
-
-
-# ---------------------------------------------------------------------------
-# Test 1: get_by_certificate_number happy path
-# ---------------------------------------------------------------------------
-
-
-def test_get_by_certificate_number_returns_epc_property_data(
-    epc_client, rdsap_21_0_1_cert
-):
-    cert_response = {"data": rdsap_21_0_1_cert}
-    with patch("httpx.get", return_value=_mock_response(200, cert_response)):
-        result = epc_client.get_by_certificate_number("CERT-001")
-
-    assert isinstance(result, EpcPropertyData)
-
-
-# ---------------------------------------------------------------------------
-# Test 2: get_by_certificate_number 404 -> EpcNotFoundError
-# ---------------------------------------------------------------------------
-
-
-def test_get_by_certificate_number_404_raises_not_found(epc_client):
-    with patch("httpx.get", return_value=_mock_response(404)):
-        with pytest.raises(EpcNotFoundError):
-            epc_client.get_by_certificate_number("BAD-CERT")
-
-
-# ---------------------------------------------------------------------------
-# Test 3: 429 retried, succeeds on 3rd attempt
-# ---------------------------------------------------------------------------
-
-
-def test_get_by_certificate_number_retries_on_429_and_succeeds(
-    epc_client, rdsap_21_0_1_cert
-):
-    cert_response = {"data": rdsap_21_0_1_cert}
-    responses = [
-        _mock_response(429),
-        _mock_response(429),
-        _mock_response(200, cert_response),
-    ]
-    with patch("httpx.get", side_effect=responses), patch(_SLEEP):
-        result = epc_client.get_by_certificate_number("CERT-001")
-
-    assert isinstance(result, EpcPropertyData)
-
-
-# ---------------------------------------------------------------------------
-# Test 3b: 429 with Retry-After header -> sleeps for that value
-# ---------------------------------------------------------------------------
-
-
-def test_429_retry_after_header_drives_sleep_duration(
-    epc_client, rdsap_21_0_1_cert
-):
-    cert_response = {"data": rdsap_21_0_1_cert}
-    responses = [
-        _mock_response(429, headers={"Retry-After": "7"}),
-        _mock_response(200, cert_response),
-    ]
-    with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep:
-        epc_client.get_by_certificate_number("CERT-001")
-
-    mock_sleep.assert_called_once_with(7.0)
-
-
-# ---------------------------------------------------------------------------
-# Test 3c: 429 without Retry-After -> falls back to exponential backoff
-# ---------------------------------------------------------------------------
-
-
-def test_429_without_retry_after_uses_exponential_backoff(
-    epc_client, rdsap_21_0_1_cert
-):
-    cert_response = {"data": rdsap_21_0_1_cert}
-    responses = [
-        _mock_response(429),
-        _mock_response(429),
-        _mock_response(200, cert_response),
-    ]
-    with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep:
-        epc_client.get_by_certificate_number("CERT-001")
-
-    assert mock_sleep.call_args_list == [call(1.0), call(2.0)]
-
-
-# ---------------------------------------------------------------------------
-# Test 3d: malformed Retry-After header -> falls back to exponential backoff
-# ---------------------------------------------------------------------------
-
-
-def test_429_malformed_retry_after_falls_back_to_backoff(
-    epc_client, rdsap_21_0_1_cert
-):
-    cert_response = {"data": rdsap_21_0_1_cert}
-    responses = [
-        _mock_response(429, headers={"Retry-After": "Wed, 21 Oct 2026 07:28:00 GMT"}),
-        _mock_response(200, cert_response),
-    ]
-    with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep:
-        epc_client.get_by_certificate_number("CERT-001")
-
-    mock_sleep.assert_called_once_with(1.0)
-
-
-# ---------------------------------------------------------------------------
-# Test 3e: Retry-After capped by max_backoff to avoid hostile/buggy values
-# ---------------------------------------------------------------------------
-
-
-def test_429_retry_after_capped_by_max_backoff(epc_client, rdsap_21_0_1_cert):
-    cert_response = {"data": rdsap_21_0_1_cert}
-    responses = [
-        _mock_response(429, headers={"Retry-After": "9999"}),
-        _mock_response(200, cert_response),
-    ]
-    with patch("httpx.get", side_effect=responses), patch(_SLEEP) as mock_sleep:
-        epc_client.get_by_certificate_number("CERT-001")
-
-    mock_sleep.assert_called_once_with(60.0)
-
-
-# ---------------------------------------------------------------------------
-# Test 4: get_by_uprn empty search -> None
-# ---------------------------------------------------------------------------
-
-
-def test_get_by_uprn_returns_none_when_no_results(epc_client):
-    with patch("httpx.get", return_value=_mock_response(200, {"data": []})):
-        result = epc_client.get_by_uprn(100023336956)
-
-    assert result is None
-
-
-# ---------------------------------------------------------------------------
-# Test 5: get_by_uprn multiple results -> fetches latest by registration_date
-# ---------------------------------------------------------------------------
-
-
-def test_get_by_uprn_picks_most_recent_certificate(epc_client, rdsap_21_0_1_cert):
-    search_rows = [
-        make_search_row(cert_num="CERT-OLD", registration_date="2022-01-01"),
-        make_search_row(cert_num="CERT-NEW", registration_date="2024-06-01"),
-        make_search_row(cert_num="CERT-MID", registration_date="2023-03-15"),
-    ]
-    cert_response = {"data": rdsap_21_0_1_cert}
-
-    def fake_get(url, params=None, **kwargs):
-        if "search" in url:
-            return _mock_response(200, {"data": search_rows})
-        return _mock_response(200, cert_response)
-
-    with patch("httpx.get", side_effect=fake_get) as mock_get:
-        result = epc_client.get_by_uprn(100023336956)
-
-    assert isinstance(result, EpcPropertyData)
-    # Second call must be for the most recent cert
-    cert_call = mock_get.call_args_list[1]
-    assert cert_call.kwargs["params"]["certificate_number"] == "CERT-NEW"
-
-
-# ---------------------------------------------------------------------------
-# Test 6: search_by_postcode returns list[EpcSearchResult]
-# ---------------------------------------------------------------------------
-
-
-def test_search_by_postcode_returns_results(epc_client):
-    rows = [
-        make_search_row(cert_num="CERT-A", address_line_1="1 High Street"),
-        make_search_row(cert_num="CERT-B", address_line_1="2 High Street"),
-    ]
-    with patch("httpx.get", return_value=_mock_response(200, {"data": rows})):
-        results = epc_client.search_by_postcode("SW1A 1AA")
-
-    assert len(results) == 2
-    assert all(isinstance(r, EpcSearchResult) for r in results)
-    assert results[0].certificate_number == "CERT-A"
-    assert results[1].address_line_1 == "2 High Street"
-
-
-# ---------------------------------------------------------------------------
-# Test 7: search_by_postcode 404 -> empty list
-# ---------------------------------------------------------------------------
-
-
-def test_search_by_postcode_404_returns_empty_list(epc_client):
-    with patch("httpx.get", return_value=_mock_response(404)):
-        results = epc_client.search_by_postcode("ZZ9 9ZZ")
-
-    assert results == []

From 8422041215ae713923ae81a19b8af40d7632337e Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Tue, 26 May 2026 15:27:45 +0000
Subject: [PATCH 22/29] landlord overrid orchestration

---
 UBIQUITOUS_LANGUAGE.md                        |   2 +-
 applications/SAL/handler.py                   |  69 -------
 .../Dockerfile                                |   0
 .../landlord_description_overrides/handler.py | 128 +++++++++++++
 ...lord_description_overrides_trigger_body.py |  15 ++
 .../local_handler/.env.local.example          |   0
 .../local_handler/docker-compose.yml          |   0
 .../local_handler/invoke_local_lambda.py      |   0
 .../local_handler/run_local.sh                |   0
 .../requirements.txt                          |   0
 ...thon-writes-landlord-overrides-directly.md |  77 ++++++++
 .../__init__.py                               |   0
 .../built_form_type.py                        |  20 ++
 .../column_classifier.py                      |   2 +-
 .../property_type.py                          |   2 +-
 .../roof_type.py                              |  70 +++++++
 .../wall_type.py                              |  70 +++++++
 domain/sal/wall_type.py                       |  15 --
 .../chatgpt/chatgpt_column_classifier.py      |   5 +-
 infrastructure/postgres/engine.py             |  24 +++
 ..._form_type_override_postgres_repository.py |  82 ++++++++
 ...landlord_built_form_type_override_table.py |  69 +++++++
 .../postgres/landlord_override_enums.py       |  35 ++++
 ...perty_type_override_postgres_repository.py |  82 ++++++++
 .../landlord_property_type_override_table.py  |  67 +++++++
 ..._wall_type_override_postgres_repository.py |  80 ++++++++
 .../landlord_wall_type_override_table.py      |  69 +++++++
 orchestration/classifiable_column.py          |  37 ++++
 ...lord_description_overrides_orchestrator.py |  83 +++++++++
 orchestration/sal_orchestrator.py             |  56 ------
 playground.py                                 |  57 ++++++
 repositories/landlord_overrides/__init__.py   |   0
 .../landlord_override_repository.py           |  38 ++++
 .../chatgpt/test_chatgpt_column_classifier.py |   6 +-
 ...lord_description_overrides_orchestrator.py | 175 ++++++++++++++++--
 .../landlord_overrides/__init__.py            |   0
 .../landlord_overrides/postgres/__init__.py   |   0
 ...perty_type_override_postgres_repository.py | 147 +++++++++++++++
 ..._wall_type_override_postgres_repository.py | 158 ++++++++++++++++
 39 files changed, 1576 insertions(+), 164 deletions(-)
 delete mode 100644 applications/SAL/handler.py
 rename applications/{SAL => landlord_description_overrides}/Dockerfile (100%)
 create mode 100644 applications/landlord_description_overrides/handler.py
 create mode 100644 applications/landlord_description_overrides/landlord_description_overrides_trigger_body.py
 rename applications/{SAL => landlord_description_overrides}/local_handler/.env.local.example (100%)
 rename applications/{SAL => landlord_description_overrides}/local_handler/docker-compose.yml (100%)
 rename applications/{SAL => landlord_description_overrides}/local_handler/invoke_local_lambda.py (100%)
 rename applications/{SAL => landlord_description_overrides}/local_handler/run_local.sh (100%)
 rename applications/{SAL => landlord_description_overrides}/requirements.txt (100%)
 create mode 100644 docs/adr/0003-python-writes-landlord-overrides-directly.md
 rename domain/{sal => landlord_description_overrides}/__init__.py (100%)
 create mode 100644 domain/landlord_description_overrides/built_form_type.py
 rename domain/{sal => landlord_description_overrides}/column_classifier.py (94%)
 rename domain/{sal => landlord_description_overrides}/property_type.py (78%)
 create mode 100644 domain/landlord_description_overrides/roof_type.py
 create mode 100644 domain/landlord_description_overrides/wall_type.py
 delete mode 100644 domain/sal/wall_type.py
 create mode 100644 infrastructure/postgres/landlord_built_form_type_override_postgres_repository.py
 create mode 100644 infrastructure/postgres/landlord_built_form_type_override_table.py
 create mode 100644 infrastructure/postgres/landlord_override_enums.py
 create mode 100644 infrastructure/postgres/landlord_property_type_override_postgres_repository.py
 create mode 100644 infrastructure/postgres/landlord_property_type_override_table.py
 create mode 100644 infrastructure/postgres/landlord_wall_type_override_postgres_repository.py
 create mode 100644 infrastructure/postgres/landlord_wall_type_override_table.py
 create mode 100644 orchestration/classifiable_column.py
 create mode 100644 orchestration/landlord_description_overrides_orchestrator.py
 delete mode 100644 orchestration/sal_orchestrator.py
 create mode 100644 playground.py
 create mode 100644 repositories/landlord_overrides/__init__.py
 create mode 100644 repositories/landlord_overrides/landlord_override_repository.py
 create mode 100644 tests/repositories/landlord_overrides/__init__.py
 create mode 100644 tests/repositories/landlord_overrides/postgres/__init__.py
 create mode 100644 tests/repositories/landlord_overrides/postgres/test_landlord_property_type_override_postgres_repository.py
 create mode 100644 tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py

diff --git a/UBIQUITOUS_LANGUAGE.md b/UBIQUITOUS_LANGUAGE.md
index 34dc3115..6426e1c1 100644
--- a/UBIQUITOUS_LANGUAGE.md
+++ b/UBIQUITOUS_LANGUAGE.md
@@ -25,7 +25,7 @@ Invoke `/ubiquitous-language` in any session to extract new terms from the conve
 | **Postcode** | A UK postal code used to group nearby addresses; the primary search key for finding EPC records. | "zip code", "postal code" |
 | **Unstandardised Address** | A frozen dataclass (`domain.addresses.unstandardised_address.UnstandardisedAddress`) capturing a single address exactly as a customer supplied it, before any standardisation: a free-text `address` line (intentionally NOT normalised), a canonical `postcode` (a `Postcode` value object, sanitised on construction), an optional `org_reference` (the customer's own identifier for the property), and `additional_info` (the full source row — every column of the customer's upload, preserved verbatim). | "user address", "asset list", "raw address", "landlord address", "Hyde address" |
 | **Address List** | A nominal `NewType` over `list[UnstandardisedAddress]` (`domain.addresses.unstandardised_address.AddressList`) — a batch of unstandardised addresses, such as one customer's bulk-onboarding upload or a postcode-grouped sub-batch produced for downstream processing. Being nominal, it is constructed explicitly: `AddressList([...])`. It is the raw *input* to ingestion; the standardised *output* is a **Standardised Asset List**. | "asset list", "Hyde address list", "user addresses" |
-| **Standardised Asset List (SAL)** | A customer's property portfolio after ingestion has cleaned and standardised it — each property carrying a canonical field set (UPRN, standardised address, postcode, property type, built form, …). It is the standardised *output* of the pipeline whose raw *input* is an **Address List** of **Unstandardised Addresses**; generated by the `SALOrchestrator`. (Legacy implementation: `asset_list.AssetList` via `load_standardised_asset_list`.) | "address list" (that is the raw input), "asset register", "portfolio list" |
+| **Standardised Asset List (SAL)** | A customer's property portfolio after ingestion has cleaned and standardised it — each property carrying a canonical field set (UPRN, standardised address, postcode, property type, built form, …). It is the standardised *output* of the pipeline whose raw *input* is an **Address List** of **Unstandardised Addresses**. (Legacy implementation: `asset_list.AssetList` via `load_standardised_asset_list`.) | "address list" (that is the raw input), "asset register", "portfolio list" |
 | **Dwelling** | A single residential unit that can hold an EPC — a house, flat, or maisonette. | "property", "unit", "home" |
 
 ## Address Matching
diff --git a/applications/SAL/handler.py b/applications/SAL/handler.py
deleted file mode 100644
index c1d73827..00000000
--- a/applications/SAL/handler.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import logging
-from typing import Any
-
-import boto3
-from orchestration.sal_orchestrator import (
-    SALOrchestrator,
-)
-from infrastructure.s3.csv_s3_client import CsvS3Client
-from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import (
-    UnstandardisedAddressListCsvS3Repository,
-)
-from domain.addresses.unstandardised_address import AddressList
-from domain.sal.column_classifier import ColumnClassifier
-from domain.sal.property_type import PropertyType
-from domain.sal.wall_type import WallType
-from infrastructure.chatgpt.chatgpt import ChatGPT
-from infrastructure.chatgpt.chatgpt_column_classifier import (
-    ChatGptColumnClassifier,
-)
-
-logger = logging.getLogger(__name__)
-
-
-def handler(
-    body: dict[str, Any],
-    context: Any,
-) -> dict[str, list[str]]:
-
-    s3_uri = "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv"
-    bucket = "retrofit-data-dev"
-
-    # boto3.client is overloaded per-service in the installed stubs; cast
-    # to Any so the strict-mode checker treats it as opaque.
-    boto3_client: Any = boto3.client  # noqa
-    boto_s3: Any = boto3_client("s3")
-
-    csv_client = CsvS3Client(boto_s3, bucket)
-    unstandardised_address_repo = UnstandardisedAddressListCsvS3Repository(
-        csv_client, bucket
-    )
-
-    # One ChatGPT-backed classifier per landlord-CSV column, keyed by column name.
-    chat_gpt = ChatGPT()
-    classifiers: dict[str, ColumnClassifier[Any]] = {
-        "Property Type": ChatGptColumnClassifier(
-            chat_gpt, PropertyType, PropertyType.UNKNOWN
-        ),
-        "Walls": ChatGptColumnClassifier(chat_gpt, WallType, WallType.UNKNOWN),
-    }
-
-    sal = SALOrchestrator(
-        unstandardised_address_repo=unstandardised_address_repo,
-        classifiers=classifiers,
-    )
-
-    addressList: AddressList = sal.get_unstandardised_addresses(input_s3_uri=s3_uri)
-
-    # Cap the batch to the first 20 while the ChatGPT path is under test.
-    addressList = AddressList(addressList[:20])
-
-    classified = sal.classify_columns(addressList)
-    for column, mapping in classified.items():
-        logger.info(
-            "Classified %d descriptions for column %r.", len(mapping), column
-        )
-
-    # TODO: persist `classified` to landlord overrides.
-
-    return {"hello": ["200"]}
diff --git a/applications/SAL/Dockerfile b/applications/landlord_description_overrides/Dockerfile
similarity index 100%
rename from applications/SAL/Dockerfile
rename to applications/landlord_description_overrides/Dockerfile
diff --git a/applications/landlord_description_overrides/handler.py b/applications/landlord_description_overrides/handler.py
new file mode 100644
index 00000000..ff16925e
--- /dev/null
+++ b/applications/landlord_description_overrides/handler.py
@@ -0,0 +1,128 @@
+import logging
+import os
+from typing import Any
+from uuid import UUID
+
+import boto3
+
+from applications.landlord_description_overrides.landlord_description_overrides_trigger_body import (
+    LandlordDescriptionOverridesTriggerBody,
+)
+from domain.addresses.unstandardised_address import AddressList
+from domain.landlord_description_overrides.built_form_type import BuiltFormType
+from domain.landlord_description_overrides.property_type import PropertyType
+from domain.landlord_description_overrides.wall_type import WallType
+from infrastructure.chatgpt.chatgpt import ChatGPT
+from infrastructure.chatgpt.chatgpt_column_classifier import ChatGptColumnClassifier
+from infrastructure.postgres.config import PostgresConfig
+from infrastructure.postgres.engine import make_engine, transactional_session
+from infrastructure.postgres.landlord_built_form_type_override_postgres_repository import (
+    LandlordBuiltFormTypeOverridePostgresRepository,
+)
+from infrastructure.postgres.landlord_property_type_override_postgres_repository import (
+    LandlordPropertyTypeOverridePostgresRepository,
+)
+from infrastructure.postgres.landlord_wall_type_override_postgres_repository import (
+    LandlordWallTypeOverridePostgresRepository,
+)
+from infrastructure.s3.csv_s3_client import CsvS3Client
+from orchestration.classifiable_column import ClassifiableColumn
+from orchestration.landlord_description_overrides_orchestrator import (
+    LandlordDescriptionOverridesOrchestrator,
+)
+from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import (
+    UnstandardisedAddressListCsvS3Repository,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def handler(
+    body: dict[str, Any],
+    context: Any,
+) -> dict[str, list[str]]:
+    # TODO: replace with ``LandlordDescriptionOverridesTriggerBody.model_validate(body)``
+    # once this lambda is wired into the parent task pipeline via the SQS
+    # subtask envelope. Until then the trigger fields are hard-coded so the
+    # local invoker can exercise the full path. See ADR-0003 §Out of scope.
+    trigger = LandlordDescriptionOverridesTriggerBody(
+        task_id=UUID("00000000-0000-0000-0000-000000000001"),
+        sub_task_id=UUID("00000000-0000-0000-0000-000000000002"),
+        s3_uri="s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv",
+        portfolio_id=730,
+    )
+
+    bucket = "retrofit-data-dev"
+
+    # boto3.client is overloaded per-service in the installed stubs; cast
+    # to Any so the strict-mode checker treats it as opaque.
+    boto3_client: Any = (
+        boto3.client
+    )  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+    boto_s3: Any = boto3_client("s3")
+
+    csv_client = CsvS3Client(boto_s3, bucket)
+    unstandardised_address_repo = UnstandardisedAddressListCsvS3Repository(
+        csv_client, bucket
+    )
+
+    # One transactional session per handler invocation: the context manager
+    # commits on clean exit and rolls back on exception, so the handler never
+    # invokes ``.commit()`` itself -- transaction semantics live in the
+    # infrastructure layer.
+    engine = make_engine(PostgresConfig.from_env(os.environ))
+    with transactional_session(engine) as session:
+        chat_gpt = ChatGPT()
+        # The "Property Type" CSV column is read by two classifiers: the
+        # landlord's free-text (e.g. "semi-detached house") encodes both the
+        # dwelling kind (PropertyType) and how it joins to neighbours
+        # (BuiltFormType). Each classification lands in its own table.
+        columns: list[ClassifiableColumn[Any]] = [
+            ClassifiableColumn(
+                name="property_type",
+                source_column="Property Type",
+                classifier=ChatGptColumnClassifier(
+                    chat_gpt, PropertyType, PropertyType.UNKNOWN
+                ),
+                repo=LandlordPropertyTypeOverridePostgresRepository(session),
+            ),
+            ClassifiableColumn(
+                name="built_form_type",
+                source_column="Property Type",
+                classifier=ChatGptColumnClassifier(
+                    chat_gpt, BuiltFormType, BuiltFormType.UNKNOWN
+                ),
+                repo=LandlordBuiltFormTypeOverridePostgresRepository(session),
+            ),
+            ClassifiableColumn(
+                name="wall_type",
+                source_column="Walls",
+                classifier=ChatGptColumnClassifier(
+                    chat_gpt, WallType, WallType.UNKNOWN
+                ),
+                repo=LandlordWallTypeOverridePostgresRepository(session),
+            ),
+        ]
+
+        orchestrator = LandlordDescriptionOverridesOrchestrator(
+            unstandardised_address_repo=unstandardised_address_repo,
+            columns=columns,
+        )
+
+        addressList: AddressList = orchestrator.get_unstandardised_addresses(
+            input_s3_uri=trigger.s3_uri
+        )
+
+        # Cap the batch to the first 20 while the ChatGPT path is under test.
+        # Remove before wiring into the production subtask pipeline.
+        addressList = AddressList(addressList[:20])
+
+        classified = orchestrator.classify_and_persist(
+            addressList, portfolio_id=trigger.portfolio_id
+        )
+        for column, mapping in classified.items():
+            logger.info(
+                "Classified %d descriptions for column %r.", len(mapping), column
+            )
+
+    return {"hello": ["200"]}
diff --git a/applications/landlord_description_overrides/landlord_description_overrides_trigger_body.py b/applications/landlord_description_overrides/landlord_description_overrides_trigger_body.py
new file mode 100644
index 00000000..9f78215e
--- /dev/null
+++ b/applications/landlord_description_overrides/landlord_description_overrides_trigger_body.py
@@ -0,0 +1,15 @@
+from uuid import UUID
+
+from pydantic import BaseModel, ConfigDict
+
+
+class LandlordDescriptionOverridesTriggerBody(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+    task_id: UUID
+    sub_task_id: UUID
+    s3_uri: str
+    # ``portfolio_id`` is ``bigint`` in the ``landlord_*_overrides`` schema --
+    # Python ``int`` is unbounded so the Pydantic side stays simple; the
+    # SQLModel row class pins the storage to ``BigInteger``.
+    portfolio_id: int
diff --git a/applications/SAL/local_handler/.env.local.example b/applications/landlord_description_overrides/local_handler/.env.local.example
similarity index 100%
rename from applications/SAL/local_handler/.env.local.example
rename to applications/landlord_description_overrides/local_handler/.env.local.example
diff --git a/applications/SAL/local_handler/docker-compose.yml b/applications/landlord_description_overrides/local_handler/docker-compose.yml
similarity index 100%
rename from applications/SAL/local_handler/docker-compose.yml
rename to applications/landlord_description_overrides/local_handler/docker-compose.yml
diff --git a/applications/SAL/local_handler/invoke_local_lambda.py b/applications/landlord_description_overrides/local_handler/invoke_local_lambda.py
similarity index 100%
rename from applications/SAL/local_handler/invoke_local_lambda.py
rename to applications/landlord_description_overrides/local_handler/invoke_local_lambda.py
diff --git a/applications/SAL/local_handler/run_local.sh b/applications/landlord_description_overrides/local_handler/run_local.sh
similarity index 100%
rename from applications/SAL/local_handler/run_local.sh
rename to applications/landlord_description_overrides/local_handler/run_local.sh
diff --git a/applications/SAL/requirements.txt b/applications/landlord_description_overrides/requirements.txt
similarity index 100%
rename from applications/SAL/requirements.txt
rename to applications/landlord_description_overrides/requirements.txt
diff --git a/docs/adr/0003-python-writes-landlord-overrides-directly.md b/docs/adr/0003-python-writes-landlord-overrides-directly.md
new file mode 100644
index 00000000..ea0fda9b
--- /dev/null
+++ b/docs/adr/0003-python-writes-landlord-overrides-directly.md
@@ -0,0 +1,77 @@
+# ADR-0003: Python writes landlord overrides directly to Postgres
+
+**Status:** Accepted
+**Date:** 2026-05-26
+**Supersedes (in part):** [assessment-model/docs/adr/0002-landlord-override-vocabulary.md](https://github.com/.../assessment-model/blob/main/docs/adr/0002-landlord-override-vocabulary.md) — specifically the clause beginning *"Writes happen from Next.js …"*.
+
+## Context
+
+ADR-0002 (in the `assessment-model` TS repo) defined the `landlord_property_type_overrides` and `landlord_wall_type_overrides` tables and noted that the Model service would POST classification results to a Next.js route handler, with Next.js performing the upsert. Drizzle remained the schema source of truth.
+
+That extra hop has not been built and is now judged unnecessary for the present scope:
+
+- The classification result is internal — a Lambda computes it, the same Lambda persists it. No third party needs to participate in the write.
+- Drizzle remains the schema's source of truth either way: the Python adapter mirrors the schema in a SQLModel row, but the migrations stay with Drizzle. Adding a Next.js route would not change which side owns schema definition.
+- The Python lambda already lives next to a Postgres connection in the existing pipeline (`subtask`/`tasks` tables are written from Python today). Adding two more tables to that adapter surface is a small, well-understood change. Routing the same writes through Next.js would mean: lambda → JSON-over-HTTP → Next.js route → Drizzle → Postgres, instead of lambda → SQLAlchemy → Postgres. Three extra moving parts to ship, deploy, monitor, and authenticate for no behavioural gain.
+
+## Decision
+
+The Model service (specifically `applications/landlord_description_overrides/handler.py`) writes directly to `landlord_property_type_overrides` and `landlord_wall_type_overrides` via a SQLAlchemy-backed `LandlordOverrideRepository[E]` adapter. No Next.js route handler is required.
+
+Transaction boundaries live in `infrastructure/postgres/engine.transactional_session` — a context manager that commits on clean exit and rolls back on exception. The application layer (`handler.py`) never calls `.commit()` or `.rollback()` itself; it only opens the context. Orchestration and repository code likewise never commits — keeping transaction semantics confined to one infrastructure helper.
+
+The conflict policy lives in SQL and is identical for every adapter implementation:
+
+```sql
+INSERT INTO landlord_property_type_overrides (portfolio_id, description, value, source)
+VALUES …
+ON CONFLICT (portfolio_id, description)
+DO UPDATE SET value = EXCLUDED.value,
+              source = EXCLUDED.source,
+              updated_at = now()
+WHERE landlord_property_type_overrides.source = 'classifier';
+```
+
+The `WHERE existing.source = 'classifier'` guard is load-bearing: it lets the classifier refresh its own past output while leaving `source = 'user'` rows untouched. This is the contract ADR-0002's `source` column was added for.
+
+`UNKNOWN` values are persisted, not skipped — consistent with ADR-0002 §5. A future user override can upgrade them.
+
+## Consequences
+
+**Positive.**
+
+- One fewer service to deploy, monitor, and authenticate.
+- The classifier and persistence live in the same process — failures surface against a single `sub_task` row, not split across two systems.
+- The Postgres adapter mirrors the existing `subtask`/`tasks` repositories, so reviewers have a precedent to compare against.
+
+**Negative.**
+
+- The Python repo now holds two schemas — the schema-source-of-truth Drizzle definition lives in the TS repo, and the Python `SQLModel` row class shadows it. They must stay in lockstep. Mitigations: the TS schema header comment (`landlord_overrides.ts:12`) already names the Python source-of-truth file; a future ADR may add a CI check that diffs the two.
+- The boundary that ADR-0002 anticipated for pgEnum validation (a Next.js route validating incoming values before insert) is gone. Pydantic + the Python `Enum` type catch invalid values on the producing side, and Postgres's pgEnum will reject anything that slips through.
+
+## File layout
+
+This ADR also fixes a placement convention for Postgres adapters going forward. The codebase currently has the ChatGPT classifier split cleanly along DDD lines — port in `domain/`, adapter in `infrastructure/chatgpt/` — but the `tasks` Postgres adapter does not follow the same shape: its concrete class lives in `repositories/tasks/`, not `infrastructure/postgres/`.
+
+The convention going forward is:
+
+- **Port (protocol / abstract base):** `repositories/<aggregate>/<thing>_repository.py`
+- **Postgres adapter (concrete):** `infrastructure/postgres/<thing>_postgres_repository.py`
+- **SQLModel row class:** `infrastructure/postgres/<thing>_table.py`
+
+The new `LandlordOverrideRepository` family follows this convention.
+
+**Existing outliers to relocate in a follow-up:**
+
+- `repositories/tasks/task_postgres_repository.py` → `infrastructure/postgres/task_postgres_repository.py`
+- `repositories/tasks/subtask_postgres_repository.py` → `infrastructure/postgres/subtask_postgres_repository.py`
+
+Both moves are mechanical (import-path updates only). They are intentionally out of scope for the present PR.
+
+## Out of scope (deferred to follow-up work)
+
+- Relocating `task_postgres_repository.py` and `subtask_postgres_repository.py` into `infrastructure/postgres/` per the convention above.
+- Extracting a shared upsert helper / base class once a third `landlord_*_overrides` column lands — until then the two adapters' 95%-identical bodies are kept side-by-side for direct comparison.
+- Switching `applications/landlord_description_overrides/handler.py` to acquire its `Session` via a `@subtask_handler()`-style decorator instead of building its own engine.
+- A cross-repo PR amending ADR-0002 to point at this ADR.
+- A CI check (or codegen) that diffs the Drizzle pgEnum literals against the Python `Enum.value` strings.
diff --git a/domain/sal/__init__.py b/domain/landlord_description_overrides/__init__.py
similarity index 100%
rename from domain/sal/__init__.py
rename to domain/landlord_description_overrides/__init__.py
diff --git a/domain/landlord_description_overrides/built_form_type.py b/domain/landlord_description_overrides/built_form_type.py
new file mode 100644
index 00000000..327ceebe
--- /dev/null
+++ b/domain/landlord_description_overrides/built_form_type.py
@@ -0,0 +1,20 @@
+from enum import Enum
+
+
+class BuiltFormType(Enum):
+    """A landlord-supplied built form, as resolved by the landlord-description-overrides context.
+
+    Mirrors the EPC built-form values. ``NOT_RECORDED`` is the legitimate
+    EPC value for properties whose built form the surveyor did not capture;
+    ``UNKNOWN`` is the classifier fallback for landlord values that cannot be
+    resolved at all.
+    """
+
+    DETACHED = "Detached"
+    SEMI_DETACHED = "Semi-Detached"
+    MID_TERRACE = "Mid-Terrace"
+    END_TERRACE = "End-Terrace"
+    ENCLOSED_MID_TERRACE = "Enclosed Mid-Terrace"
+    ENCLOSED_END_TERRACE = "Enclosed End-Terrace"
+    NOT_RECORDED = "Not Recorded"
+    UNKNOWN = "Unknown"
diff --git a/domain/sal/column_classifier.py b/domain/landlord_description_overrides/column_classifier.py
similarity index 94%
rename from domain/sal/column_classifier.py
rename to domain/landlord_description_overrides/column_classifier.py
index 3324d79f..adc88c6a 100644
--- a/domain/sal/column_classifier.py
+++ b/domain/landlord_description_overrides/column_classifier.py
@@ -21,7 +21,7 @@ class ColumnClassifier(ABC, Generic[E]):
 
     One classifier handles one landlord-CSV column. Implementations decide
     *how* the mapping is performed (an LLM, a lookup table, a rules engine);
-    ``SALOrchestrator`` depends only on this interface.
+    ``LandlordDescriptionOverridesOrchestrator`` depends only on this interface.
     """
 
     @abstractmethod
diff --git a/domain/sal/property_type.py b/domain/landlord_description_overrides/property_type.py
similarity index 78%
rename from domain/sal/property_type.py
rename to domain/landlord_description_overrides/property_type.py
index 3980c2f0..453c28c1 100644
--- a/domain/sal/property_type.py
+++ b/domain/landlord_description_overrides/property_type.py
@@ -2,7 +2,7 @@ from enum import Enum
 
 
 class PropertyType(Enum):
-    """A landlord-supplied property type, as resolved by the SAL context.
+    """A landlord-supplied property type, as resolved by the landlord-description-overrides context.
 
     Distinct from the EPC context's ``PropertyType``: a landlord CSV value
     may be unresolvable, so this enum carries an explicit ``UNKNOWN`` member.
diff --git a/domain/landlord_description_overrides/roof_type.py b/domain/landlord_description_overrides/roof_type.py
new file mode 100644
index 00000000..56ef9e8e
--- /dev/null
+++ b/domain/landlord_description_overrides/roof_type.py
@@ -0,0 +1,70 @@
+from enum import Enum
+
+
+class RoofType(Enum):
+    """A landlord-supplied roof description, as resolved by the landlord-description-overrides context.
+
+    Each member is one full EPC roof-description string, combining shape
+    (flat, pitched, roof room(s), thatched) with insulation state and, for
+    pitched roofs, the loft-insulation depth in millimetres. Adjacency
+    markers like ``(another dwelling above)`` represent a unit whose top
+    boundary is another dwelling rather than a roof of its own; they are
+    kept as members because they appear in the same EPC column.
+    ``UNKNOWN`` covers values the classifier cannot resolve -- most
+    commonly raw ``Average thermal transmittance`` U-value strings that
+    carry no shape/insulation information.
+    """
+
+    FLAT_INSULATED = "Flat, insulated"
+    FLAT_INSULATED_ASSUMED = "Flat, insulated (assumed)"
+    FLAT_LIMITED_INSULATION = "Flat, limited insulation"
+    FLAT_LIMITED_INSULATION_ASSUMED = "Flat, limited insulation (assumed)"
+    FLAT_NO_INSULATION = "Flat, no insulation"
+    FLAT_NO_INSULATION_ASSUMED = "Flat, no insulation (assumed)"
+
+    PITCHED_INSULATED = "Pitched, insulated"
+    PITCHED_INSULATED_ASSUMED = "Pitched, insulated (assumed)"
+    PITCHED_INSULATED_AT_RAFTERS = "Pitched, insulated at rafters"
+    PITCHED_LIMITED_INSULATION = "Pitched, limited insulation"
+    PITCHED_LIMITED_INSULATION_ASSUMED = "Pitched, limited insulation (assumed)"
+    PITCHED_NO_INSULATION = "Pitched, no insulation"
+    PITCHED_NO_INSULATION_ASSUMED = "Pitched, no insulation (assumed)"
+    PITCHED_UNKNOWN_LOFT_INSULATION = "Pitched, Unknown loft insulation"
+    PITCHED_LOFT_0MM = "Pitched, 0 mm loft insulation"
+    PITCHED_LOFT_12MM = "Pitched, 12 mm loft insulation"
+    PITCHED_LOFT_25MM = "Pitched, 25 mm loft insulation"
+    PITCHED_LOFT_50MM = "Pitched, 50 mm loft insulation"
+    PITCHED_LOFT_75MM = "Pitched, 75 mm loft insulation"
+    PITCHED_LOFT_100MM = "Pitched, 100 mm loft insulation"
+    PITCHED_LOFT_125MM = "Pitched, 125 mm loft insulation"
+    PITCHED_LOFT_150MM = "Pitched, 150 mm loft insulation"
+    PITCHED_LOFT_175MM = "Pitched, 175 mm loft insulation"
+    PITCHED_LOFT_200MM = "Pitched, 200 mm loft insulation"
+    PITCHED_LOFT_225MM = "Pitched, 225 mm loft insulation"
+    PITCHED_LOFT_250MM = "Pitched, 250 mm loft insulation"
+    PITCHED_LOFT_270MM = "Pitched, 270 mm loft insulation"
+    PITCHED_LOFT_300MM = "Pitched, 300 mm loft insulation"
+    PITCHED_LOFT_350MM = "Pitched, 350 mm loft insulation"
+    PITCHED_LOFT_400MM = "Pitched, 400 mm loft insulation"
+    PITCHED_LOFT_400_PLUS_MM = "Pitched, 400+ mm loft insulation"
+
+    ROOF_ROOM_INSULATED = "Roof room(s), insulated"
+    ROOF_ROOM_INSULATED_ASSUMED = "Roof room(s), insulated (assumed)"
+    ROOF_ROOM_LIMITED_INSULATION = "Roof room(s), limited insulation"
+    ROOF_ROOM_LIMITED_INSULATION_ASSUMED = "Roof room(s), limited insulation (assumed)"
+    ROOF_ROOM_NO_INSULATION = "Roof room(s), no insulation"
+    ROOF_ROOM_NO_INSULATION_ASSUMED = "Roof room(s), no insulation (assumed)"
+    ROOF_ROOM_CEILING_INSULATED = "Roof room(s), ceiling insulated"
+    ROOF_ROOM_THATCHED = "Roof room(s), thatched"
+    ROOF_ROOM_THATCHED_WITH_ADDITIONAL_INSULATION = "Roof room(s), thatched with additional insulation"
+
+    THATCHED = "Thatched"
+    THATCHED_WITH_ADDITIONAL_INSULATION = "Thatched, with additional insulation"
+
+    ADJACENT_ANOTHER_DWELLING_ABOVE = "(another dwelling above)"
+    ADJACENT_SAME_DWELLING_ABOVE = "(same dwelling above)"
+    ADJACENT_OTHER_PREMISES_ABOVE = "(other premises above)"
+    ADJACENT_ANOTHER_PREMISES_ABOVE = "(another premises above)"
+    ANOTHER_PREMISES_ABOVE = "Another Premises Above"
+
+    UNKNOWN = "Unknown"
diff --git a/domain/landlord_description_overrides/wall_type.py b/domain/landlord_description_overrides/wall_type.py
new file mode 100644
index 00000000..42b90da6
--- /dev/null
+++ b/domain/landlord_description_overrides/wall_type.py
@@ -0,0 +1,70 @@
+from enum import Enum
+
+
+class WallType(Enum):
+    """A landlord-supplied wall description, as resolved by the landlord-description-overrides context.
+
+    Each member is one full EPC wall-description string, combining material
+    (cavity, solid brick, sandstone, …) with construction/insulation state
+    (as built, filled cavity, with internal insulation, …). ``UNKNOWN`` covers
+    values the classifier cannot resolve — most commonly raw
+    ``Average thermal transmittance`` U-value strings that carry no material
+    information.
+    """
+
+    CAVITY_FILLED = "Cavity wall, filled cavity"
+    CAVITY_AS_BUILT_INSULATED_ASSUMED = "Cavity wall, as built, insulated (assumed)"
+    CAVITY_AS_BUILT_NO_INSULATION_ASSUMED = "Cavity wall, as built, no insulation (assumed)"
+    CAVITY_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Cavity wall, as built, partial insulation (assumed)"
+    CAVITY_WITH_INTERNAL_INSULATION = "Cavity wall, with internal insulation"
+    CAVITY_WITH_EXTERNAL_INSULATION = "Cavity wall, with external insulation"
+    CAVITY_FILLED_AND_INTERNAL_INSULATION = "Cavity wall, filled cavity and internal insulation"
+    CAVITY_FILLED_AND_EXTERNAL_INSULATION = "Cavity wall, filled cavity and external insulation"
+
+    SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED = "Solid brick, as built, no insulation (assumed)"
+    SOLID_BRICK_AS_BUILT_INSULATED_ASSUMED = "Solid brick, as built, insulated (assumed)"
+    SOLID_BRICK_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Solid brick, as built, partial insulation (assumed)"
+    SOLID_BRICK_WITH_INTERNAL_INSULATION = "Solid brick, with internal insulation"
+    SOLID_BRICK_WITH_EXTERNAL_INSULATION = "Solid brick, with external insulation"
+
+    TIMBER_FRAME_AS_BUILT_NO_INSULATION_ASSUMED = "Timber frame, as built, no insulation (assumed)"
+    TIMBER_FRAME_AS_BUILT_INSULATED_ASSUMED = "Timber frame, as built, insulated (assumed)"
+    TIMBER_FRAME_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Timber frame, as built, partial insulation (assumed)"
+    TIMBER_FRAME_WITH_ADDITIONAL_INSULATION = "Timber frame, with additional insulation"
+
+    SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED = "Sandstone, as built, no insulation (assumed)"
+    SANDSTONE_AS_BUILT_INSULATED_ASSUMED = "Sandstone, as built, insulated (assumed)"
+    SANDSTONE_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Sandstone, as built, partial insulation (assumed)"
+    SANDSTONE_WITH_INTERNAL_INSULATION = "Sandstone, with internal insulation"
+    SANDSTONE_WITH_EXTERNAL_INSULATION = "Sandstone, with external insulation"
+
+    GRANITE_OR_WHIN_AS_BUILT_NO_INSULATION_ASSUMED = "Granite or whin, as built, no insulation (assumed)"
+    GRANITE_OR_WHIN_AS_BUILT_INSULATED_ASSUMED = "Granite or whin, as built, insulated (assumed)"
+    GRANITE_OR_WHIN_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Granite or whin, as built, partial insulation (assumed)"
+    GRANITE_OR_WHIN_WITH_INTERNAL_INSULATION = "Granite or whin, with internal insulation"
+    GRANITE_OR_WHIN_WITH_EXTERNAL_INSULATION = "Granite or whin, with external insulation"
+
+    SYSTEM_BUILT_AS_BUILT_NO_INSULATION_ASSUMED = "System built, as built, no insulation (assumed)"
+    SYSTEM_BUILT_AS_BUILT_INSULATED_ASSUMED = "System built, as built, insulated (assumed)"
+    SYSTEM_BUILT_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "System built, as built, partial insulation (assumed)"
+    SYSTEM_BUILT_WITH_INTERNAL_INSULATION = "System built, with internal insulation"
+    SYSTEM_BUILT_WITH_EXTERNAL_INSULATION = "System built, with external insulation"
+
+    PARK_HOME_AS_BUILT = "Park home wall, as built"
+    PARK_HOME_WITH_INTERNAL_INSULATION = "Park home wall, with internal insulation"
+    PARK_HOME_WITH_EXTERNAL_INSULATION = "Park home wall, with external insulation"
+
+    COB_AS_BUILT = "Cob, as built"
+    COB_WITH_INTERNAL_INSULATION = "Cob, with internal insulation"
+    COB_WITH_EXTERNAL_INSULATION = "Cob, with external insulation"
+
+    CURTAIN_WALL = "Curtain wall"
+    CURTAIN_WALL_AS_BUILT_NO_INSULATION_ASSUMED = "Curtain Wall, as built, no insulation (assumed)"
+    CURTAIN_WALL_AS_BUILT_INSULATED_ASSUMED = "Curtain Wall, as built, insulated (assumed)"
+    CURTAIN_WALL_FILLED = "Curtain Wall, filled cavity"
+    CURTAIN_WALL_WITH_INTERNAL_INSULATION = "Curtain Wall, with internal insulation"
+
+    BASEMENT_WALL = "Basement wall"
+    BASEMENT_WALL_AS_BUILT = "Basement wall, as built"
+
+    UNKNOWN = "Unknown"
diff --git a/domain/sal/wall_type.py b/domain/sal/wall_type.py
deleted file mode 100644
index 05dc2ba9..00000000
--- a/domain/sal/wall_type.py
+++ /dev/null
@@ -1,15 +0,0 @@
-from enum import Enum
-
-
-class WallType(Enum):
-    """A landlord-supplied wall construction type, as resolved by the SAL context.
-
-    Mirrors the main RdSAP wall constructions. Like the SAL ``PropertyType``,
-    it carries an explicit ``UNKNOWN`` member for unresolvable CSV values.
-    """
-
-    CAVITY = "Cavity"
-    SOLID_BRICK = "Solid Brick"
-    TIMBER_FRAME = "Timber frame"
-    SANDSTONE = "Sandstone"
-    UNKNOWN = "Unknown"
diff --git a/infrastructure/chatgpt/chatgpt_column_classifier.py b/infrastructure/chatgpt/chatgpt_column_classifier.py
index 8f564e6c..b23e7c2e 100644
--- a/infrastructure/chatgpt/chatgpt_column_classifier.py
+++ b/infrastructure/chatgpt/chatgpt_column_classifier.py
@@ -4,7 +4,10 @@ import json
 from enum import Enum
 from typing import Any, TypeVar
 
-from domain.sal.column_classifier import ClassificationError, ColumnClassifier
+from domain.landlord_description_overrides.column_classifier import (
+    ClassificationError,
+    ColumnClassifier,
+)
 from infrastructure.chatgpt.chatgpt import ChatGPT
 from infrastructure.chatgpt.exceptions import ChatGPTClientError
 
diff --git a/infrastructure/postgres/engine.py b/infrastructure/postgres/engine.py
index 0de9efcb..ea2b35ad 100644
--- a/infrastructure/postgres/engine.py
+++ b/infrastructure/postgres/engine.py
@@ -1,3 +1,6 @@
+from collections.abc import Iterator
+from contextlib import contextmanager
+
 from sqlalchemy.engine import Engine
 from sqlmodel import Session, create_engine
 
@@ -16,3 +19,24 @@ def make_engine(config: PostgresConfig) -> Engine:
 
 def make_session(engine: Engine) -> Session:
     return Session(engine)
+
+
+@contextmanager  # pyright: ignore[reportDeprecated]
+def transactional_session(engine: Engine) -> Iterator[Session]:
+    """Yield a session whose lifecycle owns the transaction.
+
+    On clean exit the session commits; on any exception it rolls back and
+    re-raises. Either way the session is closed. Callers in the application
+    layer can do their work inside the ``with`` block without ever invoking
+    ``.commit()`` / ``.rollback()`` themselves -- transaction semantics stay
+    in the infrastructure layer.
+    """
+    session = Session(engine)
+    try:
+        yield session
+        session.commit()
+    except Exception:
+        session.rollback()
+        raise
+    finally:
+        session.close()
diff --git a/infrastructure/postgres/landlord_built_form_type_override_postgres_repository.py b/infrastructure/postgres/landlord_built_form_type_override_postgres_repository.py
new file mode 100644
index 00000000..0f7d4959
--- /dev/null
+++ b/infrastructure/postgres/landlord_built_form_type_override_postgres_repository.py
@@ -0,0 +1,82 @@
+"""Postgres adapter for ``LandlordOverrideRepository[BuiltFormType]``.
+
+Writes to ``landlord_built_form_type_overrides`` (Drizzle-managed; mirrored by
+``LandlordBuiltFormTypeOverrideRow``). The conflict policy lives in the SQL --
+see ADR-0003 §Decision. Shape mirrors
+``LandlordPropertyTypeOverridePostgresRepository``; the duplication is
+deliberate while there are only three columns -- if a fourth lands and the
+duplication becomes painful, extract a shared upsert helper then.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from typing import cast
+
+from sqlalchemy import Table
+from sqlalchemy.dialects.postgresql import insert as pg_insert
+from sqlmodel import Session
+
+from domain.landlord_description_overrides.built_form_type import BuiltFormType
+from infrastructure.postgres.landlord_built_form_type_override_table import (
+    LandlordBuiltFormTypeOverrideRow,
+)
+from infrastructure.postgres.landlord_override_enums import OverrideSource
+from repositories.landlord_overrides.landlord_override_repository import (
+    LandlordOverrideRepository,
+)
+
+
+class LandlordBuiltFormTypeOverridePostgresRepository(
+    LandlordOverrideRepository[BuiltFormType]
+):
+    def __init__(self, session: Session) -> None:
+        self._session = session
+
+    def upsert_all(
+        self,
+        portfolio_id: int,
+        descriptions_to_values: dict[str, BuiltFormType],
+    ) -> None:
+        if not descriptions_to_values:
+            return
+
+        now = datetime.now(timezone.utc)
+        rows = [
+            {
+                "portfolio_id": portfolio_id,
+                "description": description,
+                "value": value.value,
+                "source": OverrideSource.CLASSIFIER,
+                "created_at": now,
+                "updated_at": now,
+            }
+            for description, value in descriptions_to_values.items()
+        ]
+
+        # SQLModel's class-level ``__table__`` is injected at runtime on
+        # ``table=True`` classes but isn't exposed by the stubs; pin it to
+        # ``Table`` via ``getattr`` so the dialect insert helper below
+        # carries through with strict types.
+        table: Table = cast(
+            Table, getattr(LandlordBuiltFormTypeOverrideRow, "__table__")
+        )
+        stmt = pg_insert(table).values(rows)
+
+        # The classifier may refresh its own past output, but must never
+        # overwrite a user correction -- the ``WHERE existing.source =
+        # 'classifier'`` guard enforces that. See ADR-0003 §Decision.
+        stmt = stmt.on_conflict_do_update(
+            index_elements=["portfolio_id", "description"],
+            set_={
+                "value": stmt.excluded.value,
+                "source": stmt.excluded.source,
+                "updated_at": stmt.excluded.updated_at,
+            },
+            where=table.c.source == OverrideSource.CLASSIFIER,
+        )
+
+        # SQLModel re-exports SQLAlchemy's ``Session.execute``; one of the
+        # overload signatures is marked deprecated in stubs, which fires
+        # here even though our INSERT path is the supported one.
+        self._session.execute(stmt)  # pyright: ignore[reportDeprecated]
diff --git a/infrastructure/postgres/landlord_built_form_type_override_table.py b/infrastructure/postgres/landlord_built_form_type_override_table.py
new file mode 100644
index 00000000..a1f89c35
--- /dev/null
+++ b/infrastructure/postgres/landlord_built_form_type_override_table.py
@@ -0,0 +1,69 @@
+"""SQLModel mirror of the ``landlord_built_form_type_overrides`` Drizzle table.
+
+The schema source of truth lives in the ``assessment-model`` TS repo
+(`src/app/db/schema/landlord_overrides.ts`). The migrations are owned there;
+this row class only mirrors the columns so the Python lambda can read/write.
+See ADR-0003. Shape mirrors ``LandlordPropertyTypeOverrideRow`` -- the only
+differences are the table name, the ``built_form_type`` pgEnum on ``value``,
+and the unique-constraint name.
+"""
+
+from datetime import datetime, timezone
+from typing import ClassVar
+from uuid import UUID, uuid4
+
+from sqlalchemy import BigInteger, Column, UniqueConstraint
+from sqlalchemy import Enum as SAEnum
+from sqlmodel import Field, SQLModel
+
+from domain.landlord_description_overrides.built_form_type import BuiltFormType
+from infrastructure.postgres.landlord_override_enums import override_source_sa_enum
+
+
+class LandlordBuiltFormTypeOverrideRow(SQLModel, table=True):
+    __tablename__: ClassVar[str] = "landlord_built_form_type_overrides"  # pyright: ignore[reportIncompatibleVariableOverride]
+    __table_args__: ClassVar[tuple[UniqueConstraint, ...]] = (  # pyright: ignore[reportIncompatibleVariableOverride]
+        UniqueConstraint(
+            "portfolio_id",
+            "description",
+            name="landlord_built_form_type_overrides_portfolio_description_unique",
+        ),
+    )
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True)
+
+    # bigint to match the Drizzle ``portfolio_id`` FK; SQLModel's default int
+    # mapping is 32-bit Integer and would overflow once portfolio IDs exceed
+    # 2^31. The FK to ``portfolio.id`` is enforced by the Drizzle migration,
+    # not declared here -- the ``portfolio`` table is not modelled in Python.
+    portfolio_id: int = Field(
+        sa_column=Column(BigInteger, nullable=False, index=True),
+    )
+
+    description: str = Field(nullable=False)
+
+    value: BuiltFormType = Field(
+        sa_column=Column(
+            SAEnum(
+                BuiltFormType,
+                name="built_form_type",
+                values_callable=lambda cls: [m.value for m in cls],  # pyright: ignore[reportUnknownLambdaType, reportUnknownMemberType, reportUnknownVariableType]
+            ),
+            nullable=False,
+        ),
+    )
+
+    # Shared SAEnum -- see ``landlord_override_enums`` for why this single
+    # instance is reused by every ``landlord_*_overrides`` row class.
+    source: str = Field(
+        sa_column=Column(override_source_sa_enum, nullable=False),
+    )
+
+    created_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
+    updated_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
diff --git a/infrastructure/postgres/landlord_override_enums.py b/infrastructure/postgres/landlord_override_enums.py
new file mode 100644
index 00000000..ba2cee94
--- /dev/null
+++ b/infrastructure/postgres/landlord_override_enums.py
@@ -0,0 +1,35 @@
+"""Shared pgEnum definitions used by every ``landlord_*_overrides`` row class.
+
+The ``override_source`` pgEnum is referenced by both
+``landlord_property_type_overrides`` and ``landlord_wall_type_overrides``
+(per the Drizzle schema -- see ``landlord_overrides.ts``). Defining it once
+here and reusing the same SQLAlchemy ``Enum`` instance across both row
+classes keeps SQLModel's metadata coherent: ``create_all`` emits exactly one
+``CREATE TYPE override_source`` statement, not two parallel ones colliding
+on the same pgEnum name.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import Enum as SAEnum
+
+
+class OverrideSource:
+    """Mirror of the ``override_source`` pgEnum.
+
+    Drizzle defines this as ``('classifier', 'user')`` in
+    ``landlord_overrides.ts``. Modelled here as string constants so callers
+    don't sprinkle magic strings; the column is constrained by Postgres,
+    and the only Python-side producer (the classifier path) writes the
+    literal ``OverrideSource.CLASSIFIER``.
+    """
+
+    CLASSIFIER = "classifier"
+    USER = "user"
+
+
+override_source_sa_enum = SAEnum(
+    OverrideSource.CLASSIFIER,
+    OverrideSource.USER,
+    name="override_source",
+)
diff --git a/infrastructure/postgres/landlord_property_type_override_postgres_repository.py b/infrastructure/postgres/landlord_property_type_override_postgres_repository.py
new file mode 100644
index 00000000..18592c5f
--- /dev/null
+++ b/infrastructure/postgres/landlord_property_type_override_postgres_repository.py
@@ -0,0 +1,82 @@
+"""Postgres adapter for ``LandlordOverrideRepository[PropertyType]``.
+
+Writes to ``landlord_property_type_overrides`` (Drizzle-managed; mirrored by
+``LandlordPropertyTypeOverrideRow``). The conflict policy lives in the SQL --
+see ADR-0003 §Decision.
+
+Per the convention this ADR fixes, Postgres adapters live in
+``infrastructure/postgres/``. The existing ``task_postgres_repository.py`` /
+``subtask_postgres_repository.py`` are outliers still under ``repositories/``;
+relocating them is tracked as a follow-up in ADR-0003 §"File layout".
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from typing import cast
+
+from sqlalchemy import Table
+from sqlalchemy.dialects.postgresql import insert as pg_insert
+from sqlmodel import Session
+
+from domain.landlord_description_overrides.property_type import PropertyType
+from infrastructure.postgres.landlord_override_enums import OverrideSource
+from infrastructure.postgres.landlord_property_type_override_table import (
+    LandlordPropertyTypeOverrideRow,
+)
+from repositories.landlord_overrides.landlord_override_repository import (
+    LandlordOverrideRepository,
+)
+
+
+class LandlordPropertyTypeOverridePostgresRepository(
+    LandlordOverrideRepository[PropertyType]
+):
+    def __init__(self, session: Session) -> None:
+        self._session = session
+
+    def upsert_all(
+        self,
+        portfolio_id: int,
+        descriptions_to_values: dict[str, PropertyType],
+    ) -> None:
+        if not descriptions_to_values:
+            return
+
+        now = datetime.now(timezone.utc)
+        rows = [
+            {
+                "portfolio_id": portfolio_id,
+                "description": description,
+                "value": value.value,
+                "source": OverrideSource.CLASSIFIER,
+                "created_at": now,
+                "updated_at": now,
+            }
+            for description, value in descriptions_to_values.items()
+        ]
+
+        # SQLModel's class-level ``__table__`` is injected at runtime on
+        # ``table=True`` classes but isn't exposed by the stubs; pin it to
+        # ``Table`` via ``getattr`` so the dialect insert helper below
+        # carries through with strict types.
+        table: Table = cast(Table, getattr(LandlordPropertyTypeOverrideRow, "__table__"))
+        stmt = pg_insert(table).values(rows)
+
+        # The classifier may refresh its own past output, but must never
+        # overwrite a user correction -- the ``WHERE existing.source =
+        # 'classifier'`` guard enforces that. See ADR-0003 §Decision.
+        stmt = stmt.on_conflict_do_update(
+            index_elements=["portfolio_id", "description"],
+            set_={
+                "value": stmt.excluded.value,
+                "source": stmt.excluded.source,
+                "updated_at": stmt.excluded.updated_at,
+            },
+            where=table.c.source == OverrideSource.CLASSIFIER,
+        )
+
+        # SQLModel re-exports SQLAlchemy's ``Session.execute``; one of the
+        # overload signatures is marked deprecated in stubs, which fires
+        # here even though our INSERT path is the supported one.
+        self._session.execute(stmt)  # pyright: ignore[reportDeprecated]
diff --git a/infrastructure/postgres/landlord_property_type_override_table.py b/infrastructure/postgres/landlord_property_type_override_table.py
new file mode 100644
index 00000000..b76d508e
--- /dev/null
+++ b/infrastructure/postgres/landlord_property_type_override_table.py
@@ -0,0 +1,67 @@
+"""SQLModel mirror of the ``landlord_property_type_overrides`` Drizzle table.
+
+The schema source of truth lives in the ``assessment-model`` TS repo
+(`src/app/db/schema/landlord_overrides.ts`). The migrations are owned there;
+this row class only mirrors the columns so the Python lambda can read/write.
+See ADR-0003.
+"""
+
+from datetime import datetime, timezone
+from typing import ClassVar
+from uuid import UUID, uuid4
+
+from sqlalchemy import BigInteger, Column, UniqueConstraint
+from sqlalchemy import Enum as SAEnum
+from sqlmodel import Field, SQLModel
+
+from domain.landlord_description_overrides.property_type import PropertyType
+from infrastructure.postgres.landlord_override_enums import override_source_sa_enum
+
+
+class LandlordPropertyTypeOverrideRow(SQLModel, table=True):
+    __tablename__: ClassVar[str] = "landlord_property_type_overrides"  # pyright: ignore[reportIncompatibleVariableOverride]
+    __table_args__: ClassVar[tuple[UniqueConstraint, ...]] = (  # pyright: ignore[reportIncompatibleVariableOverride]
+        UniqueConstraint(
+            "portfolio_id",
+            "description",
+            name="landlord_property_type_overrides_portfolio_description_unique",
+        ),
+    )
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True)
+
+    # bigint to match the Drizzle ``portfolio_id`` FK; SQLModel's default int
+    # mapping is 32-bit Integer and would overflow once portfolio IDs exceed
+    # 2^31. The FK to ``portfolio.id`` is enforced by the Drizzle migration,
+    # not declared here -- the ``portfolio`` table is not modelled in Python.
+    portfolio_id: int = Field(
+        sa_column=Column(BigInteger, nullable=False, index=True),
+    )
+
+    description: str = Field(nullable=False)
+
+    value: PropertyType = Field(
+        sa_column=Column(
+            SAEnum(
+                PropertyType,
+                name="property_type",
+                values_callable=lambda cls: [m.value for m in cls],  # pyright: ignore[reportUnknownLambdaType, reportUnknownMemberType, reportUnknownVariableType]
+            ),
+            nullable=False,
+        ),
+    )
+
+    # Shared SAEnum -- see ``landlord_override_enums`` for why this single
+    # instance is reused by every ``landlord_*_overrides`` row class.
+    source: str = Field(
+        sa_column=Column(override_source_sa_enum, nullable=False),
+    )
+
+    created_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
+    updated_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
diff --git a/infrastructure/postgres/landlord_wall_type_override_postgres_repository.py b/infrastructure/postgres/landlord_wall_type_override_postgres_repository.py
new file mode 100644
index 00000000..21b73e98
--- /dev/null
+++ b/infrastructure/postgres/landlord_wall_type_override_postgres_repository.py
@@ -0,0 +1,80 @@
+"""Postgres adapter for ``LandlordOverrideRepository[WallType]``.
+
+Writes to ``landlord_wall_type_overrides`` (Drizzle-managed; mirrored by
+``LandlordWallTypeOverrideRow``). The conflict policy lives in the SQL --
+see ADR-0003 §Decision. Shape mirrors
+``LandlordPropertyTypeOverridePostgresRepository``; the duplication is
+deliberate while there are only two columns -- if a third lands and the
+duplication becomes painful, extract a shared upsert helper then.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from typing import cast
+
+from sqlalchemy import Table
+from sqlalchemy.dialects.postgresql import insert as pg_insert
+from sqlmodel import Session
+
+from domain.landlord_description_overrides.wall_type import WallType
+from infrastructure.postgres.landlord_override_enums import OverrideSource
+from infrastructure.postgres.landlord_wall_type_override_table import (
+    LandlordWallTypeOverrideRow,
+)
+from repositories.landlord_overrides.landlord_override_repository import (
+    LandlordOverrideRepository,
+)
+
+
+class LandlordWallTypeOverridePostgresRepository(
+    LandlordOverrideRepository[WallType]
+):
+    def __init__(self, session: Session) -> None:
+        self._session = session
+
+    def upsert_all(
+        self,
+        portfolio_id: int,
+        descriptions_to_values: dict[str, WallType],
+    ) -> None:
+        if not descriptions_to_values:
+            return
+
+        now = datetime.now(timezone.utc)
+        rows = [
+            {
+                "portfolio_id": portfolio_id,
+                "description": description,
+                "value": value.value,
+                "source": OverrideSource.CLASSIFIER,
+                "created_at": now,
+                "updated_at": now,
+            }
+            for description, value in descriptions_to_values.items()
+        ]
+
+        # SQLModel's class-level ``__table__`` is injected at runtime on
+        # ``table=True`` classes but isn't exposed by the stubs; pin it to
+        # ``Table`` via ``getattr`` so the dialect insert helper below
+        # carries through with strict types.
+        table: Table = cast(Table, getattr(LandlordWallTypeOverrideRow, "__table__"))
+        stmt = pg_insert(table).values(rows)
+
+        # The classifier may refresh its own past output, but must never
+        # overwrite a user correction -- the ``WHERE existing.source =
+        # 'classifier'`` guard enforces that. See ADR-0003 §Decision.
+        stmt = stmt.on_conflict_do_update(
+            index_elements=["portfolio_id", "description"],
+            set_={
+                "value": stmt.excluded.value,
+                "source": stmt.excluded.source,
+                "updated_at": stmt.excluded.updated_at,
+            },
+            where=table.c.source == OverrideSource.CLASSIFIER,
+        )
+
+        # SQLModel re-exports SQLAlchemy's ``Session.execute``; one of the
+        # overload signatures is marked deprecated in stubs, which fires
+        # here even though our INSERT path is the supported one.
+        self._session.execute(stmt)  # pyright: ignore[reportDeprecated]
diff --git a/infrastructure/postgres/landlord_wall_type_override_table.py b/infrastructure/postgres/landlord_wall_type_override_table.py
new file mode 100644
index 00000000..79bea46a
--- /dev/null
+++ b/infrastructure/postgres/landlord_wall_type_override_table.py
@@ -0,0 +1,69 @@
+"""SQLModel mirror of the ``landlord_wall_type_overrides`` Drizzle table.
+
+The schema source of truth lives in the ``assessment-model`` TS repo
+(`src/app/db/schema/landlord_overrides.ts`). The migrations are owned there;
+this row class only mirrors the columns so the Python lambda can read/write.
+See ADR-0003. Shape mirrors ``LandlordPropertyTypeOverrideRow`` -- the only
+differences are the table name, the ``wall_type`` pgEnum on ``value``, and
+the unique-constraint name.
+"""
+
+from datetime import datetime, timezone
+from typing import ClassVar
+from uuid import UUID, uuid4
+
+from sqlalchemy import BigInteger, Column, UniqueConstraint
+from sqlalchemy import Enum as SAEnum
+from sqlmodel import Field, SQLModel
+
+from domain.landlord_description_overrides.wall_type import WallType
+from infrastructure.postgres.landlord_override_enums import override_source_sa_enum
+
+
+class LandlordWallTypeOverrideRow(SQLModel, table=True):
+    __tablename__: ClassVar[str] = "landlord_wall_type_overrides"  # pyright: ignore[reportIncompatibleVariableOverride]
+    __table_args__: ClassVar[tuple[UniqueConstraint, ...]] = (  # pyright: ignore[reportIncompatibleVariableOverride]
+        UniqueConstraint(
+            "portfolio_id",
+            "description",
+            name="landlord_wall_type_overrides_portfolio_description_unique",
+        ),
+    )
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True)
+
+    # bigint to match the Drizzle ``portfolio_id`` FK; SQLModel's default int
+    # mapping is 32-bit Integer and would overflow once portfolio IDs exceed
+    # 2^31. The FK to ``portfolio.id`` is enforced by the Drizzle migration,
+    # not declared here -- the ``portfolio`` table is not modelled in Python.
+    portfolio_id: int = Field(
+        sa_column=Column(BigInteger, nullable=False, index=True),
+    )
+
+    description: str = Field(nullable=False)
+
+    value: WallType = Field(
+        sa_column=Column(
+            SAEnum(
+                WallType,
+                name="wall_type",
+                values_callable=lambda cls: [m.value for m in cls],  # pyright: ignore[reportUnknownLambdaType, reportUnknownMemberType, reportUnknownVariableType]
+            ),
+            nullable=False,
+        ),
+    )
+
+    # Shared SAEnum -- see ``landlord_override_enums`` for why this single
+    # instance is reused by every ``landlord_*_overrides`` row class.
+    source: str = Field(
+        sa_column=Column(override_source_sa_enum, nullable=False),
+    )
+
+    created_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
+    updated_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
diff --git a/orchestration/classifiable_column.py b/orchestration/classifiable_column.py
new file mode 100644
index 00000000..fb1dab6e
--- /dev/null
+++ b/orchestration/classifiable_column.py
@@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import Generic, TypeVar
+
+from domain.landlord_description_overrides.column_classifier import ColumnClassifier
+from repositories.landlord_overrides.landlord_override_repository import (
+    LandlordOverrideRepository,
+)
+
+E = TypeVar("E", bound=Enum)
+
+
+@dataclass(frozen=True)
+class ClassifiableColumn(Generic[E]):
+    """Pairs a column's classifier with the repository that persists its results.
+
+    The orchestrator registers one ``ClassifiableColumn`` per
+    (source column, target enum) pair. Bundling the classifier and the
+    repository together makes the "this enum lands in this table" invariant
+    structural -- the handler can no longer wire ``PropertyType``
+    classifications to a ``WallType`` repo by keying two dicts with the same
+    string.
+
+    ``source_column`` is the landlord-CSV header to read from; ``name`` is the
+    unique key the orchestrator uses to report this classification's results
+    (and the key the handler logs). Two ``ClassifiableColumn``s may share a
+    ``source_column`` -- e.g. the ``"Property Type"`` CSV column feeds both
+    ``PropertyType`` and ``BuiltFormType`` classifiers off the same free-text
+    description -- but each must have a unique ``name``.
+    """
+
+    name: str
+    source_column: str
+    classifier: ColumnClassifier[E]
+    repo: LandlordOverrideRepository[E]
diff --git a/orchestration/landlord_description_overrides_orchestrator.py b/orchestration/landlord_description_overrides_orchestrator.py
new file mode 100644
index 00000000..389d1afb
--- /dev/null
+++ b/orchestration/landlord_description_overrides_orchestrator.py
@@ -0,0 +1,83 @@
+from enum import Enum
+from typing import Any
+
+from domain.addresses.unstandardised_address import AddressList
+from orchestration.classifiable_column import ClassifiableColumn
+from repositories.unstandardised_address.unstandardised_address_list_repository import (
+    UnstandardisedAddressListRepository,
+)
+
+
+class LandlordDescriptionOverridesOrchestrator:
+    def __init__(
+        self,
+        unstandardised_address_repo: UnstandardisedAddressListRepository,
+        columns: list[ClassifiableColumn[Any]],
+    ) -> None:
+        self._unstandardised_address_repo = unstandardised_address_repo
+        # Each entry is one (source CSV column, target enum) classification.
+        # Two entries may share ``source_column`` -- e.g. ``"Property Type"``
+        # feeds both PropertyType and BuiltFormType classifiers -- so the
+        # registry is a list rather than a dict keyed by header.
+        self._columns = columns
+
+    def get_unstandardised_addresses(
+        self,
+        input_s3_uri: str,
+    ) -> AddressList:
+        return self._unstandardised_address_repo.load_batch(input_s3_uri)
+
+    def get_col_to_description_mappings(
+        self, list_of_unstandardised_address: AddressList
+    ) -> dict[str, set[str]]:
+        mappings: dict[str, set[str]] = {}
+        for unstandardised_address in list_of_unstandardised_address:
+            for key, value in unstandardised_address.additional_info.items():
+                bucket = mappings.setdefault(key, set())
+                # A comma-separated value is several descriptions in one cell;
+                # split it so each is its own entry. Lower-case so case-only
+                # typos collapse to one variant.
+                for variant in value.split(","):
+                    variant = variant.strip().lower()
+                    if variant:
+                        bucket.add(variant)
+        return mappings
+
+    def classify_columns(
+        self, addresses: AddressList
+    ) -> dict[str, dict[str, Enum]]:
+        """Classify every registered column's descriptions.
+
+        Returns a mapping of ``ClassifiableColumn.name`` to
+        ``{description: category}``. A registered column whose ``source_column``
+        is absent from the addresses contributes an empty inner mapping.
+        """
+        col_to_desc = self.get_col_to_description_mappings(addresses)
+        return {
+            column.name: column.classifier.classify(
+                col_to_desc.get(column.source_column, set())
+            )
+            for column in self._columns
+        }
+
+    def classify_and_persist(
+        self, addresses: AddressList, portfolio_id: int
+    ) -> dict[str, dict[str, Enum]]:
+        """Classify every registered column and persist the results.
+
+        Each non-empty mapping is written via the column's repository under
+        ``source = 'classifier'``. Empty mappings (a registered column whose
+        ``source_column`` is absent from this batch) skip the DB round-trip.
+        The orchestrator does not commit -- the caller owns the transaction
+        boundary.
+
+        Returns the same shape as ``classify_columns`` so callers can log
+        per-column counts.
+        """
+        classified = self.classify_columns(addresses)
+        for column in self._columns:
+            mapping = classified[column.name]
+            if not mapping:
+                continue
+            column.repo.upsert_all(portfolio_id, mapping)
+        return classified
diff --git a/orchestration/sal_orchestrator.py b/orchestration/sal_orchestrator.py
deleted file mode 100644
index 6b451746..00000000
--- a/orchestration/sal_orchestrator.py
+++ /dev/null
@@ -1,56 +0,0 @@
-from enum import Enum
-from typing import Any
-
-from domain.addresses.unstandardised_address import AddressList
-from domain.sal.column_classifier import ColumnClassifier
-from repositories.unstandardised_address.unstandardised_address_list_repository import (
-    UnstandardisedAddressListRepository,
-)
-
-
-class SALOrchestrator:
-    def __init__(
-        self,
-        unstandardised_address_repo: UnstandardisedAddressListRepository,
-        classifiers: dict[str, ColumnClassifier[Any]],
-    ) -> None:
-        self._unstandardised_address_repo = unstandardised_address_repo
-        # Keyed by landlord-CSV column name.
-        self._classifiers = classifiers
-
-    def get_unstandardised_addresses(
-        self,
-        input_s3_uri: str,
-    ) -> AddressList:
-        return self._unstandardised_address_repo.load_batch(input_s3_uri)
-
-    def get_col_to_description_mappings(
-        self, list_of_unstandardised_address: AddressList
-    ) -> dict[str, set[str]]:
-        mappings: dict[str, set[str]] = {}
-        for unstandardised_address in list_of_unstandardised_address:
-            for key, value in unstandardised_address.additional_info.items():
-                bucket = mappings.setdefault(key, set())
-                # A comma-separated value is several descriptions in one cell;
-                # split it so each is its own entry. Lower-case so case-only
-                # typos collapse to one variant.
-                for variant in value.split(","):
-                    variant = variant.strip().lower()
-                    if variant:
-                        bucket.add(variant)
-        return mappings
-
-    def classify_columns(
-        self, addresses: AddressList
-    ) -> dict[str, dict[str, Enum]]:
-        """Classify every registered column's descriptions.
-
-        Returns a mapping of column name to ``{description: category}``. A
-        registered column absent from the addresses contributes an empty
-        inner mapping.
-        """
-        col_to_desc = self.get_col_to_description_mappings(addresses)
-        return {
-            column: classifier.classify(col_to_desc.get(column, set()))
-            for column, classifier in self._classifiers.items()
-        }
diff --git a/playground.py b/playground.py
new file mode 100644
index 00000000..d116dcf9
--- /dev/null
+++ b/playground.py
@@ -0,0 +1,57 @@
+"""Read a file and return unique values from a chosen column."""
+
+from pathlib import Path
+import argparse
+import sys
+
+import pandas as pd
+
+
+def read_file(path: str | Path) -> pd.DataFrame:
+    path = Path(path)
+    suffix = path.suffix.lower()
+    if suffix == ".csv":
+        return pd.read_csv(path)
+    if suffix == ".tsv":
+        return pd.read_csv(path, sep="\t")
+    if suffix in {".xlsx", ".xls"}:
+        return pd.read_excel(path)
+    if suffix == ".parquet":
+        return pd.read_parquet(path)
+    if suffix == ".json":
+        return pd.read_json(path)
+    raise ValueError(f"Unsupported file type: {suffix}")
+
+
+def get_unique(path: str | Path, column: str, dropna: bool = True) -> list:
+    df = read_file(Path(path))
+    if column not in df.columns:
+        raise KeyError(f"Column {column!r} not found. Available: {list(df.columns)}")
+    series = df[column].dropna() if dropna else df[column]
+    return series.unique().tolist()
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--path", default="/workspaces/model/certificates-2026.csv")
+    parser.add_argument("--column", nargs="walls_description")
+    parser.add_argument("--keep-na", action="store_true")
+    args, _ = parser.parse_known_args()
+
+    df = read_file(args.path)
+
+    if not args.column:
+        print("Available columns:")
+        for c in df.columns:
+            print(f"  - {c}")
+        return 0
+
+    column = "roof_description"
+    series = df[column] if args.keep_na else df[column].dropna()
+    for value in series.unique():
+        print(value)
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/repositories/landlord_overrides/__init__.py b/repositories/landlord_overrides/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/repositories/landlord_overrides/landlord_override_repository.py b/repositories/landlord_overrides/landlord_override_repository.py
new file mode 100644
index 00000000..47e873fe
--- /dev/null
+++ b/repositories/landlord_overrides/landlord_override_repository.py
@@ -0,0 +1,38 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from enum import Enum
+from typing import Generic, TypeVar
+
+E = TypeVar("E", bound=Enum)
+
+
+class LandlordOverrideRepository(ABC, Generic[E]):
+    """Port: persists landlord (description -> category) overrides for a portfolio.
+
+    One repository implementation targets one ``landlord_<category>_overrides``
+    table. The category enum ``E`` (e.g. ``PropertyType``, ``WallType``) determines
+    which table the adapter writes to; the orchestrator depends only on this
+    interface and never names a concrete table.
+
+    Concrete adapters live in ``infrastructure/`` (see ADR-0003): for example
+    ``infrastructure/postgres/landlord_property_type_override_postgres_repository.py``.
+    """
+
+    @abstractmethod
+    def upsert_all(
+        self,
+        portfolio_id: int,
+        descriptions_to_values: dict[str, E],
+    ) -> None:
+        """Upsert each ``(portfolio_id, description) -> value`` row with ``source='classifier'``.
+
+        On conflict with an existing row whose ``source = 'classifier'``, the row
+        is updated (value, source, updated_at). On conflict with a row whose
+        ``source = 'user'``, the existing row is preserved -- the classifier
+        never overwrites a user correction. See ADR-0003 §Decision.
+
+        An empty ``descriptions_to_values`` mapping is a no-op; callers may
+        skip this call entirely when they have nothing to write.
+        """
+        ...
diff --git a/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py b/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py
index 5ec854f1..8a07ecec 100644
--- a/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py
+++ b/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py
@@ -4,9 +4,9 @@ from typing import Optional
 
 import pytest
 
-from domain.sal.column_classifier import ClassificationError
-from domain.sal.property_type import PropertyType
-from domain.sal.wall_type import WallType
+from domain.landlord_description_overrides.column_classifier import ClassificationError
+from domain.landlord_description_overrides.property_type import PropertyType
+from domain.landlord_description_overrides.wall_type import WallType
 from infrastructure.chatgpt.chatgpt import ChatGPT
 from infrastructure.chatgpt.chatgpt_column_classifier import (
     ChatGptColumnClassifier,
diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
index 62f1a329..eee4a310 100644
--- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py
+++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
@@ -4,12 +4,17 @@ from enum import Enum
 from typing import Any, Optional
 
 from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
+from domain.landlord_description_overrides.built_form_type import BuiltFormType
+from domain.landlord_description_overrides.column_classifier import ColumnClassifier
+from domain.landlord_description_overrides.property_type import PropertyType
+from domain.landlord_description_overrides.wall_type import WallType
 from domain.postcode import Postcode
-from domain.sal.column_classifier import ColumnClassifier
-from domain.sal.property_type import PropertyType
-from domain.sal.wall_type import WallType
-from orchestration.sal_orchestrator import (
-    SALOrchestrator,
+from orchestration.classifiable_column import ClassifiableColumn
+from orchestration.landlord_description_overrides_orchestrator import (
+    LandlordDescriptionOverridesOrchestrator,
+)
+from repositories.landlord_overrides.landlord_override_repository import (
+    LandlordOverrideRepository,
 )
 from repositories.unstandardised_address.unstandardised_address_list_repository import (
     UnstandardisedAddressListRepository,
@@ -38,6 +43,18 @@ class _StubColumnClassifier(ColumnClassifier[Enum]):
         return self._result
 
 
+class _StubLandlordOverrideRepository(LandlordOverrideRepository[Enum]):
+    """Records every ``upsert_all`` call so tests can assert routing."""
+
+    def __init__(self) -> None:
+        self.calls: list[tuple[int, dict[str, Enum]]] = []
+
+    def upsert_all(
+        self, portfolio_id: int, descriptions_to_values: dict[str, Enum]
+    ) -> None:
+        self.calls.append((portfolio_id, dict(descriptions_to_values)))
+
+
 def _make_unstandardised_address(
     landlord_additional_info: dict[str, str],
 ) -> UnstandardisedAddress:
@@ -49,11 +66,25 @@ def _make_unstandardised_address(
 
 
 def _orchestrator(
-    classifiers: Optional[dict[str, ColumnClassifier[Any]]] = None,
-) -> SALOrchestrator:
-    return SALOrchestrator(
+    columns: Optional[list[ClassifiableColumn[Any]]] = None,
+) -> LandlordDescriptionOverridesOrchestrator:
+    return LandlordDescriptionOverridesOrchestrator(
         unstandardised_address_repo=_StubUnstandardisedAddressRepository(),
-        classifiers=classifiers or {},
+        columns=columns or [],
+    )
+
+
+def _column(
+    name: str,
+    source_column: str,
+    classifier: ColumnClassifier[Any],
+    repo: Optional[LandlordOverrideRepository[Any]] = None,
+) -> ClassifiableColumn[Any]:
+    return ClassifiableColumn(
+        name=name,
+        source_column=source_column,
+        classifier=classifier,
+        repo=repo or _StubLandlordOverrideRepository(),
     )
 
 
@@ -155,30 +186,140 @@ def test_classify_columns_classifies_each_registered_column() -> None:
     property_types = _StubColumnClassifier(
         result={"semi-detached": PropertyType.HOUSE}
     )
-    wall_types = _StubColumnClassifier(result={"solid brick": WallType.SOLID_BRICK})
+    wall_types = _StubColumnClassifier(result={"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED})
 
     # act
     result = _orchestrator(
-        {"Property Type": property_types, "Walls": wall_types}
+        [
+            _column("property_type", "Property Type", property_types),
+            _column("wall_type", "Walls", wall_types),
+        ]
     ).classify_columns(addresses)
 
-    # assert: each registered column was classified independently.
+    # assert: each registered column was classified independently, keyed by name.
     assert result == {
-        "Property Type": {"semi-detached": PropertyType.HOUSE},
-        "Walls": {"solid brick": WallType.SOLID_BRICK},
+        "property_type": {"semi-detached": PropertyType.HOUSE},
+        "wall_type": {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED},
     }
 
 
 def test_classify_columns_yields_empty_mapping_for_an_absent_column() -> None:
-    # arrange: a classifier is registered for a column the addresses lack.
+    # arrange: a classifier is registered for a source column the addresses lack.
     addresses = AddressList([_make_unstandardised_address({"Walls": "cavity"})])
     property_types = _StubColumnClassifier(result={})
 
     # act
     result = _orchestrator(
-        {"Property Type": property_types}
+        [_column("property_type", "Property Type", property_types)]
     ).classify_columns(addresses)
 
     # assert: the absent column classified an empty description set.
-    assert result == {"Property Type": {}}
+    assert result == {"property_type": {}}
     assert property_types.received == set()
+
+
+def test_classify_columns_runs_two_classifiers_against_a_shared_source_column() -> None:
+    # arrange: the "Property Type" landlord column feeds two classifiers --
+    # PropertyType (what kind of dwelling) and BuiltFormType (how it joins
+    # to neighbours). Both must run against the same description set; each
+    # result is keyed by its column's ``name``.
+    addresses = AddressList(
+        [_make_unstandardised_address({"Property Type": "semi-detached house"})]
+    )
+    property_types = _StubColumnClassifier(
+        result={"semi-detached house": PropertyType.HOUSE}
+    )
+    built_form_types = _StubColumnClassifier(
+        result={"semi-detached house": BuiltFormType.SEMI_DETACHED}
+    )
+
+    # act
+    result = _orchestrator(
+        [
+            _column("property_type", "Property Type", property_types),
+            _column("built_form_type", "Property Type", built_form_types),
+        ]
+    ).classify_columns(addresses)
+
+    # assert: both classifiers saw the same description set, and the two
+    # results live under their own ``name`` keys without colliding.
+    assert property_types.received == {"semi-detached house"}
+    assert built_form_types.received == {"semi-detached house"}
+    assert result == {
+        "property_type": {"semi-detached house": PropertyType.HOUSE},
+        "built_form_type": {"semi-detached house": BuiltFormType.SEMI_DETACHED},
+    }
+
+
+def test_classify_and_persist_writes_each_columns_mapping_to_its_own_repo() -> None:
+    # arrange: two columns with distinct repos -- the orchestrator must
+    # route each column's classifications to its own repo, not mix them.
+    addresses = AddressList(
+        [
+            _make_unstandardised_address(
+                {"Property Type": "semi-detached", "Walls": "solid brick"}
+            ),
+        ]
+    )
+    property_type_repo = _StubLandlordOverrideRepository()
+    wall_type_repo = _StubLandlordOverrideRepository()
+    columns: list[ClassifiableColumn[Any]] = [
+        _column(
+            "property_type",
+            "Property Type",
+            _StubColumnClassifier({"semi-detached": PropertyType.HOUSE}),
+            property_type_repo,
+        ),
+        _column(
+            "wall_type",
+            "Walls",
+            _StubColumnClassifier({"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED}),
+            wall_type_repo,
+        ),
+    ]
+
+    # act
+    result = _orchestrator(columns).classify_and_persist(addresses, portfolio_id=42)
+
+    # assert: each repo received exactly its own column's mapping, under the
+    # given portfolio_id, and the return value mirrors classify_columns.
+    assert property_type_repo.calls == [(42, {"semi-detached": PropertyType.HOUSE})]
+    assert wall_type_repo.calls == [
+        (42, {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED})
+    ]
+    assert result == {
+        "property_type": {"semi-detached": PropertyType.HOUSE},
+        "wall_type": {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED},
+    }
+
+
+def test_classify_and_persist_skips_upsert_for_a_column_absent_from_the_batch() -> None:
+    # arrange: ``Walls`` is registered but the address has no ``Walls`` column.
+    # The orchestrator should still classify (yielding an empty mapping) but
+    # must NOT call ``upsert_all`` -- an empty bulk insert is a noisy no-op.
+    addresses = AddressList(
+        [_make_unstandardised_address({"Property Type": "semi-detached"})]
+    )
+    property_type_repo = _StubLandlordOverrideRepository()
+    wall_type_repo = _StubLandlordOverrideRepository()
+    columns: list[ClassifiableColumn[Any]] = [
+        _column(
+            "property_type",
+            "Property Type",
+            _StubColumnClassifier({"semi-detached": PropertyType.HOUSE}),
+            property_type_repo,
+        ),
+        _column(
+            "wall_type",
+            "Walls",
+            _StubColumnClassifier({}),
+            wall_type_repo,
+        ),
+    ]
+
+    # act
+    _orchestrator(columns).classify_and_persist(addresses, portfolio_id=7)
+
+    # assert: Property Type wrote; Walls did not.
+    assert property_type_repo.calls == [(7, {"semi-detached": PropertyType.HOUSE})]
+    assert wall_type_repo.calls == []
diff --git a/tests/repositories/landlord_overrides/__init__.py b/tests/repositories/landlord_overrides/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/repositories/landlord_overrides/postgres/__init__.py b/tests/repositories/landlord_overrides/postgres/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/repositories/landlord_overrides/postgres/test_landlord_property_type_override_postgres_repository.py b/tests/repositories/landlord_overrides/postgres/test_landlord_property_type_override_postgres_repository.py
new file mode 100644
index 00000000..9154b664
--- /dev/null
+++ b/tests/repositories/landlord_overrides/postgres/test_landlord_property_type_override_postgres_repository.py
@@ -0,0 +1,147 @@
+"""Integration tests for the source-aware upsert policy.
+
+The conflict policy lives entirely in SQL (``INSERT ... ON CONFLICT
+... DO UPDATE ... WHERE existing.source = 'classifier'``). The only way to
+verify it correctly distinguishes ``EXCLUDED.source`` from the qualified
+``landlord_property_type_overrides.source`` is against a real Postgres --
+the ``db_engine`` fixture in ``tests/conftest.py`` spins one up per test.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+import pytest
+from sqlalchemy import Engine
+from sqlmodel import Session, select
+
+from domain.landlord_description_overrides.property_type import PropertyType
+from infrastructure.postgres.landlord_override_enums import OverrideSource
+from infrastructure.postgres.landlord_property_type_override_postgres_repository import (
+    LandlordPropertyTypeOverridePostgresRepository,
+)
+from infrastructure.postgres.landlord_property_type_override_table import (
+    LandlordPropertyTypeOverrideRow,
+)
+
+
+@pytest.fixture
+def session(db_engine: Engine) -> Iterator[Session]:
+    with Session(db_engine) as s:
+        yield s
+
+
+def _select_row(
+    session: Session, portfolio_id: int, description: str
+) -> LandlordPropertyTypeOverrideRow:
+    rows = session.exec(
+        select(LandlordPropertyTypeOverrideRow).where(
+            LandlordPropertyTypeOverrideRow.portfolio_id == portfolio_id,
+            LandlordPropertyTypeOverrideRow.description == description,
+        )
+    ).all()
+    assert len(rows) == 1, f"expected exactly one row, got {len(rows)}"
+    return rows[0]
+
+
+def test_inserts_a_fresh_row_with_source_classifier(session: Session) -> None:
+    # arrange
+    repo = LandlordPropertyTypeOverridePostgresRepository(session)
+
+    # act
+    repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.HOUSE})
+    session.commit()
+
+    # assert
+    row = _select_row(session, portfolio_id=1, description="cosy")
+    assert row.value is PropertyType.HOUSE
+    assert row.source == OverrideSource.CLASSIFIER
+
+
+def test_reupsert_overwrites_a_classifier_row(session: Session) -> None:
+    # arrange: a stale classifier row exists.
+    repo = LandlordPropertyTypeOverridePostgresRepository(session)
+    repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.FLAT})
+    session.commit()
+
+    # act: re-classify with a different category.
+    repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.HOUSE})
+    session.commit()
+
+    # assert: the new classification wins.
+    row = _select_row(session, portfolio_id=1, description="cosy")
+    assert row.value is PropertyType.HOUSE
+    assert row.source == OverrideSource.CLASSIFIER
+
+
+def test_reupsert_does_not_overwrite_a_user_row(session: Session) -> None:
+    # arrange: a user has corrected the row to ``BUNGALOW``. The classifier
+    # path never produces ``source = 'user'``; we install the row directly
+    # to mimic the override frontend.
+    user_row = LandlordPropertyTypeOverrideRow(
+        portfolio_id=1,
+        description="cosy",
+        value=PropertyType.BUNGALOW,
+        source=OverrideSource.USER,
+    )
+    session.add(user_row)
+    session.commit()
+
+    # act: the classifier re-runs and tries to classify the same description
+    # as a ``HOUSE``. Under the source-aware conflict policy, this must be
+    # silently skipped -- user edits beat classifier reruns.
+    repo = LandlordPropertyTypeOverridePostgresRepository(session)
+    repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.HOUSE})
+    session.commit()
+
+    # assert: the user row is unchanged.
+    row = _select_row(session, portfolio_id=1, description="cosy")
+    assert row.value is PropertyType.BUNGALOW
+    assert row.source == OverrideSource.USER
+
+
+def test_upsert_keeps_other_portfolios_descriptions_independent(
+    session: Session,
+) -> None:
+    # arrange: the unique key is ``(portfolio_id, description)``, so the same
+    # description for two different portfolios must coexist as two rows.
+    repo = LandlordPropertyTypeOverridePostgresRepository(session)
+
+    # act
+    repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": PropertyType.HOUSE})
+    repo.upsert_all(portfolio_id=2, descriptions_to_values={"cosy": PropertyType.FLAT})
+    session.commit()
+
+    # assert: both rows survive with their own values.
+    assert _select_row(session, 1, "cosy").value is PropertyType.HOUSE
+    assert _select_row(session, 2, "cosy").value is PropertyType.FLAT
+
+
+def test_upsert_persists_unknown_so_a_user_can_resolve_it_later(
+    session: Session,
+) -> None:
+    # arrange / act: a description the classifier couldn't resolve still
+    # lands -- per ADR-0002 §5 / ADR-0003 §Decision, so a future user
+    # override can upgrade it to a real value.
+    repo = LandlordPropertyTypeOverridePostgresRepository(session)
+    repo.upsert_all(
+        portfolio_id=1,
+        descriptions_to_values={"unparseable nonsense": PropertyType.UNKNOWN},
+    )
+    session.commit()
+
+    # assert: the row exists with value=UNKNOWN, source=classifier.
+    row = _select_row(session, portfolio_id=1, description="unparseable nonsense")
+    assert row.value is PropertyType.UNKNOWN
+    assert row.source == OverrideSource.CLASSIFIER
+
+
+def test_upsert_all_with_empty_mapping_is_a_no_op(session: Session) -> None:
+    # arrange / act
+    repo = LandlordPropertyTypeOverridePostgresRepository(session)
+    repo.upsert_all(portfolio_id=1, descriptions_to_values={})
+    session.commit()
+
+    # assert: nothing was inserted.
+    rows = session.exec(select(LandlordPropertyTypeOverrideRow)).all()
+    assert rows == []
diff --git a/tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py b/tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py
new file mode 100644
index 00000000..2aae83dd
--- /dev/null
+++ b/tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py
@@ -0,0 +1,158 @@
+"""Integration tests for the source-aware upsert policy on the WallType table.
+
+Mirror of ``test_landlord_property_type_override_postgres_repository.py`` --
+the SQL is structurally identical, but the conflict policy lives in two
+separate concrete adapters and so warrants two parallel test suites until
+(if) the adapters are factored through a shared upsert helper.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+import pytest
+from sqlalchemy import Engine
+from sqlmodel import Session, select
+
+from domain.landlord_description_overrides.wall_type import WallType
+from infrastructure.postgres.landlord_override_enums import OverrideSource
+from infrastructure.postgres.landlord_wall_type_override_postgres_repository import (
+    LandlordWallTypeOverridePostgresRepository,
+)
+from infrastructure.postgres.landlord_wall_type_override_table import (
+    LandlordWallTypeOverrideRow,
+)
+
+
+@pytest.fixture
+def session(db_engine: Engine) -> Iterator[Session]:
+    with Session(db_engine) as s:
+        yield s
+
+
+def _select_row(
+    session: Session, portfolio_id: int, description: str
+) -> LandlordWallTypeOverrideRow:
+    rows = session.exec(
+        select(LandlordWallTypeOverrideRow).where(
+            LandlordWallTypeOverrideRow.portfolio_id == portfolio_id,
+            LandlordWallTypeOverrideRow.description == description,
+        )
+    ).all()
+    assert len(rows) == 1, f"expected exactly one row, got {len(rows)}"
+    return rows[0]
+
+
+def test_inserts_a_fresh_row_with_source_classifier(session: Session) -> None:
+    # arrange
+    repo = LandlordWallTypeOverridePostgresRepository(session)
+
+    # act
+    repo.upsert_all(
+        portfolio_id=1, descriptions_to_values={"cavity insulated": WallType.CAVITY}
+    )
+    session.commit()
+
+    # assert
+    row = _select_row(session, portfolio_id=1, description="cavity insulated")
+    assert row.value is WallType.CAVITY
+    assert row.source == OverrideSource.CLASSIFIER
+
+
+def test_reupsert_overwrites_a_classifier_row(session: Session) -> None:
+    # arrange: a stale classifier row exists.
+    repo = LandlordWallTypeOverridePostgresRepository(session)
+    repo.upsert_all(
+        portfolio_id=1, descriptions_to_values={"old red brick": WallType.CAVITY}
+    )
+    session.commit()
+
+    # act: re-classify with a different category.
+    repo.upsert_all(
+        portfolio_id=1, descriptions_to_values={"old red brick": WallType.SOLID_BRICK}
+    )
+    session.commit()
+
+    # assert: the new classification wins.
+    row = _select_row(session, portfolio_id=1, description="old red brick")
+    assert row.value is WallType.SOLID_BRICK
+    assert row.source == OverrideSource.CLASSIFIER
+
+
+def test_reupsert_does_not_overwrite_a_user_row(session: Session) -> None:
+    # arrange: a user has corrected the row to ``SANDSTONE``. The classifier
+    # path never produces ``source = 'user'``; we install the row directly
+    # to mimic the override frontend.
+    user_row = LandlordWallTypeOverrideRow(
+        portfolio_id=1,
+        description="old red brick",
+        value=WallType.SANDSTONE,
+        source=OverrideSource.USER,
+    )
+    session.add(user_row)
+    session.commit()
+
+    # act: the classifier re-runs and tries to classify the same description
+    # as ``SOLID_BRICK``. Under the source-aware conflict policy, this must
+    # be silently skipped -- user edits beat classifier reruns.
+    repo = LandlordWallTypeOverridePostgresRepository(session)
+    repo.upsert_all(
+        portfolio_id=1, descriptions_to_values={"old red brick": WallType.SOLID_BRICK}
+    )
+    session.commit()
+
+    # assert: the user row is unchanged.
+    row = _select_row(session, portfolio_id=1, description="old red brick")
+    assert row.value is WallType.SANDSTONE
+    assert row.source == OverrideSource.USER
+
+
+def test_upsert_keeps_other_portfolios_descriptions_independent(
+    session: Session,
+) -> None:
+    # arrange / act: the unique key is ``(portfolio_id, description)``, so the
+    # same description for two different portfolios must coexist as two rows.
+    repo = LandlordWallTypeOverridePostgresRepository(session)
+    repo.upsert_all(
+        portfolio_id=1, descriptions_to_values={"old red brick": WallType.CAVITY}
+    )
+    repo.upsert_all(
+        portfolio_id=2, descriptions_to_values={"old red brick": WallType.SOLID_BRICK}
+    )
+    session.commit()
+
+    # assert: both rows survive with their own values.
+    assert _select_row(session, 1, "old red brick").value is WallType.CAVITY
+    assert _select_row(session, 2, "old red brick").value is WallType.SOLID_BRICK
+
+
+def test_upsert_persists_unknown_so_a_user_can_resolve_it_later(
+    session: Session,
+) -> None:
+    # arrange / act: a description the classifier couldn't resolve still
+    # lands -- per ADR-0002 §5 / ADR-0003 §Decision, so a future user
+    # override can upgrade it to a real value.
+    repo = LandlordWallTypeOverridePostgresRepository(session)
+    repo.upsert_all(
+        portfolio_id=1,
+        descriptions_to_values={"unparseable wall description": WallType.UNKNOWN},
+    )
+    session.commit()
+
+    # assert: the row exists with value=UNKNOWN, source=classifier.
+    row = _select_row(
+        session, portfolio_id=1, description="unparseable wall description"
+    )
+    assert row.value is WallType.UNKNOWN
+    assert row.source == OverrideSource.CLASSIFIER
+
+
+def test_upsert_all_with_empty_mapping_is_a_no_op(session: Session) -> None:
+    # arrange / act
+    repo = LandlordWallTypeOverridePostgresRepository(session)
+    repo.upsert_all(portfolio_id=1, descriptions_to_values={})
+    session.commit()
+
+    # assert: nothing was inserted.
+    rows = session.exec(select(LandlordWallTypeOverrideRow)).all()
+    assert rows == []

From 36f4c32904a40f76e7c07a153cc96c41c925ebe6 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Tue, 26 May 2026 16:18:26 +0000
Subject: [PATCH 23/29] added roofs

---
 .../landlord_description_overrides/handler.py | 20 +++-
 .../wall_type.py                              | 93 ++++++++++++++-----
 .../wall_type_construction_dates.py           | 72 ++++++++++++++
 .../chatgpt/chatgpt_column_classifier.py      | 19 +++-
 ..._roof_type_override_postgres_repository.py | 80 ++++++++++++++++
 .../landlord_roof_type_override_table.py      | 69 ++++++++++++++
 playground.py                                 |  2 +-
 .../chatgpt/test_chatgpt_column_classifier.py | 54 ++++++++++-
 8 files changed, 378 insertions(+), 31 deletions(-)
 create mode 100644 domain/landlord_description_overrides/wall_type_construction_dates.py
 create mode 100644 infrastructure/postgres/landlord_roof_type_override_postgres_repository.py
 create mode 100644 infrastructure/postgres/landlord_roof_type_override_table.py

diff --git a/applications/landlord_description_overrides/handler.py b/applications/landlord_description_overrides/handler.py
index ff16925e..7b7b60af 100644
--- a/applications/landlord_description_overrides/handler.py
+++ b/applications/landlord_description_overrides/handler.py
@@ -11,7 +11,11 @@ from applications.landlord_description_overrides.landlord_description_overrides_
 from domain.addresses.unstandardised_address import AddressList
 from domain.landlord_description_overrides.built_form_type import BuiltFormType
 from domain.landlord_description_overrides.property_type import PropertyType
+from domain.landlord_description_overrides.roof_type import RoofType
 from domain.landlord_description_overrides.wall_type import WallType
+from domain.landlord_description_overrides.wall_type_construction_dates import (
+    wall_type_construction_date_prompt_hint,
+)
 from infrastructure.chatgpt.chatgpt import ChatGPT
 from infrastructure.chatgpt.chatgpt_column_classifier import ChatGptColumnClassifier
 from infrastructure.postgres.config import PostgresConfig
@@ -22,6 +26,9 @@ from infrastructure.postgres.landlord_built_form_type_override_postgres_reposito
 from infrastructure.postgres.landlord_property_type_override_postgres_repository import (
     LandlordPropertyTypeOverridePostgresRepository,
 )
+from infrastructure.postgres.landlord_roof_type_override_postgres_repository import (
+    LandlordRoofTypeOverridePostgresRepository,
+)
 from infrastructure.postgres.landlord_wall_type_override_postgres_repository import (
     LandlordWallTypeOverridePostgresRepository,
 )
@@ -98,10 +105,21 @@ def handler(
                 name="wall_type",
                 source_column="Walls",
                 classifier=ChatGptColumnClassifier(
-                    chat_gpt, WallType, WallType.UNKNOWN
+                    chat_gpt,
+                    WallType,
+                    WallType.UNKNOWN,
+                    extra_instructions=wall_type_construction_date_prompt_hint(),
                 ),
                 repo=LandlordWallTypeOverridePostgresRepository(session),
             ),
+            ClassifiableColumn(
+                name="roof_type",
+                source_column="Roofs",
+                classifier=ChatGptColumnClassifier(
+                    chat_gpt, RoofType, RoofType.UNKNOWN
+                ),
+                repo=LandlordRoofTypeOverridePostgresRepository(session),
+            ),
         ]
 
         orchestrator = LandlordDescriptionOverridesOrchestrator(
diff --git a/domain/landlord_description_overrides/wall_type.py b/domain/landlord_description_overrides/wall_type.py
index 42b90da6..1466f82d 100644
--- a/domain/landlord_description_overrides/wall_type.py
+++ b/domain/landlord_description_overrides/wall_type.py
@@ -13,40 +13,83 @@ class WallType(Enum):
     """
 
     CAVITY_FILLED = "Cavity wall, filled cavity"
-    CAVITY_AS_BUILT_INSULATED_ASSUMED = "Cavity wall, as built, insulated (assumed)"
-    CAVITY_AS_BUILT_NO_INSULATION_ASSUMED = "Cavity wall, as built, no insulation (assumed)"
-    CAVITY_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Cavity wall, as built, partial insulation (assumed)"
+    CAVITY_AS_BUILT_INSULATED_ASSUMED = (
+        "Cavity wall, as built, insulated (assumed)"  # 1983 - 1990
+    )
+    CAVITY_AS_BUILT_NO_INSULATION_ASSUMED = (
+        "Cavity wall, as built, no insulation (assumed)"  # Pre-1975
+    )
+
+    CAVITY_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
+        "Cavity wall, as built, partial insulation (assumed)"  # 1976 - 1982
+    )
     CAVITY_WITH_INTERNAL_INSULATION = "Cavity wall, with internal insulation"
     CAVITY_WITH_EXTERNAL_INSULATION = "Cavity wall, with external insulation"
-    CAVITY_FILLED_AND_INTERNAL_INSULATION = "Cavity wall, filled cavity and internal insulation"
-    CAVITY_FILLED_AND_EXTERNAL_INSULATION = "Cavity wall, filled cavity and external insulation"
+    CAVITY_FILLED_AND_INTERNAL_INSULATION = (
+        "Cavity wall, filled cavity and internal insulation"
+    )
+    CAVITY_FILLED_AND_EXTERNAL_INSULATION = (
+        "Cavity wall, filled cavity and external insulation"
+    )
 
-    SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED = "Solid brick, as built, no insulation (assumed)"
-    SOLID_BRICK_AS_BUILT_INSULATED_ASSUMED = "Solid brick, as built, insulated (assumed)"
-    SOLID_BRICK_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Solid brick, as built, partial insulation (assumed)"
+    SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED = (
+        "Solid brick, as built, no insulation (assumed)"
+    )
+    SOLID_BRICK_AS_BUILT_INSULATED_ASSUMED = (
+        "Solid brick, as built, insulated (assumed)"
+    )
+    SOLID_BRICK_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
+        "Solid brick, as built, partial insulation (assumed)"
+    )
     SOLID_BRICK_WITH_INTERNAL_INSULATION = "Solid brick, with internal insulation"
     SOLID_BRICK_WITH_EXTERNAL_INSULATION = "Solid brick, with external insulation"
 
-    TIMBER_FRAME_AS_BUILT_NO_INSULATION_ASSUMED = "Timber frame, as built, no insulation (assumed)"
-    TIMBER_FRAME_AS_BUILT_INSULATED_ASSUMED = "Timber frame, as built, insulated (assumed)"
-    TIMBER_FRAME_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Timber frame, as built, partial insulation (assumed)"
+    TIMBER_FRAME_AS_BUILT_NO_INSULATION_ASSUMED = (
+        "Timber frame, as built, no insulation (assumed)"
+    )
+    TIMBER_FRAME_AS_BUILT_INSULATED_ASSUMED = (
+        "Timber frame, as built, insulated (assumed)"
+    )
+    TIMBER_FRAME_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
+        "Timber frame, as built, partial insulation (assumed)"
+    )
     TIMBER_FRAME_WITH_ADDITIONAL_INSULATION = "Timber frame, with additional insulation"
 
-    SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED = "Sandstone, as built, no insulation (assumed)"
+    SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED = (
+        "Sandstone, as built, no insulation (assumed)"
+    )
     SANDSTONE_AS_BUILT_INSULATED_ASSUMED = "Sandstone, as built, insulated (assumed)"
-    SANDSTONE_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Sandstone, as built, partial insulation (assumed)"
+    SANDSTONE_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
+        "Sandstone, as built, partial insulation (assumed)"
+    )
     SANDSTONE_WITH_INTERNAL_INSULATION = "Sandstone, with internal insulation"
     SANDSTONE_WITH_EXTERNAL_INSULATION = "Sandstone, with external insulation"
 
-    GRANITE_OR_WHIN_AS_BUILT_NO_INSULATION_ASSUMED = "Granite or whin, as built, no insulation (assumed)"
-    GRANITE_OR_WHIN_AS_BUILT_INSULATED_ASSUMED = "Granite or whin, as built, insulated (assumed)"
-    GRANITE_OR_WHIN_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "Granite or whin, as built, partial insulation (assumed)"
-    GRANITE_OR_WHIN_WITH_INTERNAL_INSULATION = "Granite or whin, with internal insulation"
-    GRANITE_OR_WHIN_WITH_EXTERNAL_INSULATION = "Granite or whin, with external insulation"
+    GRANITE_OR_WHIN_AS_BUILT_NO_INSULATION_ASSUMED = (
+        "Granite or whin, as built, no insulation (assumed)"
+    )
+    GRANITE_OR_WHIN_AS_BUILT_INSULATED_ASSUMED = (
+        "Granite or whin, as built, insulated (assumed)"
+    )
+    GRANITE_OR_WHIN_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
+        "Granite or whin, as built, partial insulation (assumed)"
+    )
+    GRANITE_OR_WHIN_WITH_INTERNAL_INSULATION = (
+        "Granite or whin, with internal insulation"
+    )
+    GRANITE_OR_WHIN_WITH_EXTERNAL_INSULATION = (
+        "Granite or whin, with external insulation"
+    )
 
-    SYSTEM_BUILT_AS_BUILT_NO_INSULATION_ASSUMED = "System built, as built, no insulation (assumed)"
-    SYSTEM_BUILT_AS_BUILT_INSULATED_ASSUMED = "System built, as built, insulated (assumed)"
-    SYSTEM_BUILT_AS_BUILT_PARTIAL_INSULATION_ASSUMED = "System built, as built, partial insulation (assumed)"
+    SYSTEM_BUILT_AS_BUILT_NO_INSULATION_ASSUMED = (
+        "System built, as built, no insulation (assumed)"
+    )
+    SYSTEM_BUILT_AS_BUILT_INSULATED_ASSUMED = (
+        "System built, as built, insulated (assumed)"
+    )
+    SYSTEM_BUILT_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
+        "System built, as built, partial insulation (assumed)"
+    )
     SYSTEM_BUILT_WITH_INTERNAL_INSULATION = "System built, with internal insulation"
     SYSTEM_BUILT_WITH_EXTERNAL_INSULATION = "System built, with external insulation"
 
@@ -59,8 +102,12 @@ class WallType(Enum):
     COB_WITH_EXTERNAL_INSULATION = "Cob, with external insulation"
 
     CURTAIN_WALL = "Curtain wall"
-    CURTAIN_WALL_AS_BUILT_NO_INSULATION_ASSUMED = "Curtain Wall, as built, no insulation (assumed)"
-    CURTAIN_WALL_AS_BUILT_INSULATED_ASSUMED = "Curtain Wall, as built, insulated (assumed)"
+    CURTAIN_WALL_AS_BUILT_NO_INSULATION_ASSUMED = (
+        "Curtain Wall, as built, no insulation (assumed)"
+    )
+    CURTAIN_WALL_AS_BUILT_INSULATED_ASSUMED = (
+        "Curtain Wall, as built, insulated (assumed)"
+    )
     CURTAIN_WALL_FILLED = "Curtain Wall, filled cavity"
     CURTAIN_WALL_WITH_INTERNAL_INSULATION = "Curtain Wall, with internal insulation"
 
diff --git a/domain/landlord_description_overrides/wall_type_construction_dates.py b/domain/landlord_description_overrides/wall_type_construction_dates.py
new file mode 100644
index 00000000..4cd869b3
--- /dev/null
+++ b/domain/landlord_description_overrides/wall_type_construction_dates.py
@@ -0,0 +1,72 @@
+"""Construction-date metadata for the "assumed" ``WallType`` variants.
+
+The ``(assumed)`` variants of ``WallType`` are what RdSAP picks when a
+surveyor has no direct observation and falls back to the typical wall make-up
+for a property's build era. The era boundaries reflect UK Building
+Regulations milestones for cavity-wall insulation:
+
+* up to 1975 -- no cavity insulation requirement
+* 1976-1982 -- partial-fill cavity (early insulation requirement)
+* 1983-1990 -- full-fill cavity (insulation required)
+
+Captured here as a structured lookup so:
+
+* the LLM prompt builder can render the ranges as a hint, helping the
+  classifier resolve era-implying landlord descriptions to the right
+  ``(assumed)`` variant;
+* future date-aware paths (a deterministic year-to-variant shortcut, a
+  date-keyed repo) can read from the same source instead of duplicating
+  the knowledge.
+
+Only the variants where we have a defensible era boundary appear here; the
+remaining ``(assumed)`` members are left out rather than guessed.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Mapping, Optional
+
+from domain.landlord_description_overrides.wall_type import WallType
+
+
+@dataclass(frozen=True)
+class YearRange:
+    """An inclusive year range. ``None`` on either end means "no bound"."""
+
+    start: Optional[int] = None
+    end: Optional[int] = None
+
+    def __str__(self) -> str:
+        if self.start is None and self.end is not None:
+            return f"pre-{self.end + 1}"
+        if self.start is not None and self.end is None:
+            return f"{self.start}+"
+        return f"{self.start}-{self.end}"
+
+
+WALL_TYPE_CONSTRUCTION_YEARS: Mapping[WallType, YearRange] = {
+    WallType.CAVITY_AS_BUILT_NO_INSULATION_ASSUMED: YearRange(end=1975),
+    WallType.CAVITY_AS_BUILT_PARTIAL_INSULATION_ASSUMED: YearRange(
+        start=1976, end=1982
+    ),
+    WallType.CAVITY_AS_BUILT_INSULATED_ASSUMED: YearRange(start=1983, end=1990),
+}
+
+
+def wall_type_construction_date_prompt_hint() -> str:
+    """Render the date metadata as a prompt fragment for the LLM classifier.
+
+    The fragment lists each (variant, year range) pair so the model can
+    prefer the era-matching ``(assumed)`` variant when a landlord
+    description carries era information (e.g. "1970s semi", "built before
+    the war").
+    """
+    lines = [
+        f"- {wall_type.value!r}: typically built {year_range}"
+        for wall_type, year_range in WALL_TYPE_CONSTRUCTION_YEARS.items()
+    ]
+    return (
+        "When the description carries construction-era information, prefer "
+        "the category whose typical build year matches:\n" + "\n".join(lines)
+    )
diff --git a/infrastructure/chatgpt/chatgpt_column_classifier.py b/infrastructure/chatgpt/chatgpt_column_classifier.py
index b23e7c2e..2ce66299 100644
--- a/infrastructure/chatgpt/chatgpt_column_classifier.py
+++ b/infrastructure/chatgpt/chatgpt_column_classifier.py
@@ -2,7 +2,7 @@ from __future__ import annotations
 
 import json
 from enum import Enum
-from typing import Any, TypeVar
+from typing import Any, Optional, TypeVar
 
 from domain.landlord_description_overrides.column_classifier import (
     ClassificationError,
@@ -27,10 +27,16 @@ class ChatGptColumnClassifier(ColumnClassifier[E]):
         chat_gpt: ChatGPT,
         category_enum: type[E],
         unknown: E,
+        extra_instructions: Optional[str] = None,
     ) -> None:
         self._chat_gpt = chat_gpt
         self._category_enum = category_enum
         self._unknown = unknown
+        # Free-form column-specific guidance appended to the system prompt
+        # ahead of the JSON-output instruction. Lets each column ship its
+        # own hints (e.g. wall-type construction-era ranges) without the
+        # generic classifier knowing what they are.
+        self._extra_instructions = extra_instructions
 
     def classify(self, descriptions: set[str]) -> dict[str, E]:
         if not descriptions:
@@ -62,12 +68,17 @@ class ChatGptColumnClassifier(ColumnClassifier[E]):
             for member in self._category_enum
             if member is not self._unknown
         )
-        return (
-            "Classify each free-text description into exactly one category. "
-            f"Categories: {categories}. "
+        parts = [
+            "Classify each free-text description into exactly one category. ",
+            f"Categories: {categories}. ",
+        ]
+        if self._extra_instructions:
+            parts.append(self._extra_instructions + " ")
+        parts.append(
             "Reply with only a JSON object mapping each original description "
             "to its category, and nothing else."
         )
+        return "".join(parts)
 
     def _to_category(self, value: Any) -> E:
         """Map a reply value to a category member, defaulting to UNKNOWN."""
diff --git a/infrastructure/postgres/landlord_roof_type_override_postgres_repository.py b/infrastructure/postgres/landlord_roof_type_override_postgres_repository.py
new file mode 100644
index 00000000..b5b570bc
--- /dev/null
+++ b/infrastructure/postgres/landlord_roof_type_override_postgres_repository.py
@@ -0,0 +1,80 @@
+"""Postgres adapter for ``LandlordOverrideRepository[RoofType]``.
+
+Writes to ``landlord_roof_type_overrides`` (Drizzle-managed; mirrored by
+``LandlordRoofTypeOverrideRow``). The conflict policy lives in the SQL --
+see ADR-0003 §Decision. Shape mirrors
+``LandlordPropertyTypeOverridePostgresRepository``; the duplication is
+deliberate while there are only a handful of override columns -- if the
+duplication becomes painful, extract a shared upsert helper then.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from typing import cast
+
+from sqlalchemy import Table
+from sqlalchemy.dialects.postgresql import insert as pg_insert
+from sqlmodel import Session
+
+from domain.landlord_description_overrides.roof_type import RoofType
+from infrastructure.postgres.landlord_override_enums import OverrideSource
+from infrastructure.postgres.landlord_roof_type_override_table import (
+    LandlordRoofTypeOverrideRow,
+)
+from repositories.landlord_overrides.landlord_override_repository import (
+    LandlordOverrideRepository,
+)
+
+
+class LandlordRoofTypeOverridePostgresRepository(
+    LandlordOverrideRepository[RoofType]
+):
+    def __init__(self, session: Session) -> None:
+        self._session = session
+
+    def upsert_all(
+        self,
+        portfolio_id: int,
+        descriptions_to_values: dict[str, RoofType],
+    ) -> None:
+        if not descriptions_to_values:
+            return
+
+        now = datetime.now(timezone.utc)
+        rows = [
+            {
+                "portfolio_id": portfolio_id,
+                "description": description,
+                "value": value.value,
+                "source": OverrideSource.CLASSIFIER,
+                "created_at": now,
+                "updated_at": now,
+            }
+            for description, value in descriptions_to_values.items()
+        ]
+
+        # SQLModel's class-level ``__table__`` is injected at runtime on
+        # ``table=True`` classes but isn't exposed by the stubs; pin it to
+        # ``Table`` via ``getattr`` so the dialect insert helper below
+        # carries through with strict types.
+        table: Table = cast(Table, getattr(LandlordRoofTypeOverrideRow, "__table__"))
+        stmt = pg_insert(table).values(rows)
+
+        # The classifier may refresh its own past output, but must never
+        # overwrite a user correction -- the ``WHERE existing.source =
+        # 'classifier'`` guard enforces that. See ADR-0003 §Decision.
+        stmt = stmt.on_conflict_do_update(
+            index_elements=["portfolio_id", "description"],
+            set_={
+                "value": stmt.excluded.value,
+                "source": stmt.excluded.source,
+                "updated_at": stmt.excluded.updated_at,
+            },
+            where=table.c.source == OverrideSource.CLASSIFIER,
+        )
+
+        # SQLModel re-exports SQLAlchemy's ``Session.execute``; one of the
+        # overload signatures is marked deprecated in stubs, which fires
+        # here even though our INSERT path is the supported one.
+        self._session.execute(stmt)  # pyright: ignore[reportDeprecated]
diff --git a/infrastructure/postgres/landlord_roof_type_override_table.py b/infrastructure/postgres/landlord_roof_type_override_table.py
new file mode 100644
index 00000000..f0cea945
--- /dev/null
+++ b/infrastructure/postgres/landlord_roof_type_override_table.py
@@ -0,0 +1,69 @@
+"""SQLModel mirror of the ``landlord_roof_type_overrides`` Drizzle table.
+
+The schema source of truth lives in the ``assessment-model`` TS repo
+(`src/app/db/schema/landlord_overrides.ts`). The migrations are owned there;
+this row class only mirrors the columns so the Python lambda can read/write.
+See ADR-0003. Shape mirrors ``LandlordPropertyTypeOverrideRow`` -- the only
+differences are the table name, the ``roof_type`` pgEnum on ``value``, and
+the unique-constraint name.
+"""
+
+from datetime import datetime, timezone
+from typing import ClassVar
+from uuid import UUID, uuid4
+
+from sqlalchemy import BigInteger, Column, UniqueConstraint
+from sqlalchemy import Enum as SAEnum
+from sqlmodel import Field, SQLModel
+
+from domain.landlord_description_overrides.roof_type import RoofType
+from infrastructure.postgres.landlord_override_enums import override_source_sa_enum
+
+
+class LandlordRoofTypeOverrideRow(SQLModel, table=True):
+    __tablename__: ClassVar[str] = "landlord_roof_type_overrides"  # pyright: ignore[reportIncompatibleVariableOverride]
+    __table_args__: ClassVar[tuple[UniqueConstraint, ...]] = (  # pyright: ignore[reportIncompatibleVariableOverride]
+        UniqueConstraint(
+            "portfolio_id",
+            "description",
+            name="landlord_roof_type_overrides_portfolio_description_unique",
+        ),
+    )
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True)
+
+    # bigint to match the Drizzle ``portfolio_id`` FK; SQLModel's default int
+    # mapping is 32-bit Integer and would overflow once portfolio IDs exceed
+    # 2^31. The FK to ``portfolio.id`` is enforced by the Drizzle migration,
+    # not declared here -- the ``portfolio`` table is not modelled in Python.
+    portfolio_id: int = Field(
+        sa_column=Column(BigInteger, nullable=False, index=True),
+    )
+
+    description: str = Field(nullable=False)
+
+    value: RoofType = Field(
+        sa_column=Column(
+            SAEnum(
+                RoofType,
+                name="roof_type",
+                values_callable=lambda cls: [m.value for m in cls],  # pyright: ignore[reportUnknownLambdaType, reportUnknownMemberType, reportUnknownVariableType]
+            ),
+            nullable=False,
+        ),
+    )
+
+    # Shared SAEnum -- see ``landlord_override_enums`` for why this single
+    # instance is reused by every ``landlord_*_overrides`` row class.
+    source: str = Field(
+        sa_column=Column(override_source_sa_enum, nullable=False),
+    )
+
+    created_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
+    updated_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
diff --git a/playground.py b/playground.py
index d116dcf9..5e9001e1 100644
--- a/playground.py
+++ b/playground.py
@@ -46,7 +46,7 @@ def main() -> int:
             print(f"  - {c}")
         return 0
 
-    column = "roof_description"
+    column = "wall "
     series = df[column] if args.keep_na else df[column].dropna()
     for value in series.unique():
         print(value)
diff --git a/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py b/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py
index 8a07ecec..4cdf4dfe 100644
--- a/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py
+++ b/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py
@@ -23,11 +23,13 @@ class _FakeChatGPT(ChatGPT):
         error: Optional[Exception] = None,
     ) -> None:
         self.prompts: list[str] = []
+        self.system_prompts: list[Optional[str]] = []
         self._reply = reply
         self._error = error
 
     def generate(self, prompt: str, system_prompt: Optional[str] = None) -> str:
         self.prompts.append(prompt)
+        self.system_prompts.append(system_prompt)
         if self._error is not None:
             raise self._error
         return self._reply
@@ -125,11 +127,59 @@ def test_empty_description_set_returns_empty_without_calling_chatgpt() -> None:
 
 def test_classifies_with_a_different_category_enum() -> None:
     # Arrange: the same adapter classifies a WallType column.
-    chat_gpt = _FakeChatGPT(reply='{"solid brick wall": "Solid Brick"}')
+    chat_gpt = _FakeChatGPT(
+        reply='{"solid brick wall": "Solid brick, as built, no insulation (assumed)"}'
+    )
     classifier = ChatGptColumnClassifier(chat_gpt, WallType, WallType.UNKNOWN)
 
     # Act
     result = classifier.classify({"solid brick wall"})
 
     # Assert
-    assert result == {"solid brick wall": WallType.SOLID_BRICK}
+    assert result == {
+        "solid brick wall": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED
+    }
+
+
+def test_extra_instructions_are_appended_to_the_system_prompt() -> None:
+    # Arrange: column-specific guidance (e.g. wall-type build-era hints)
+    # should reach the model verbatim, in the system prompt ahead of the
+    # JSON-output instruction.
+    chat_gpt = _FakeChatGPT(reply='{"1970s semi": "House"}')
+    classifier = ChatGptColumnClassifier(
+        chat_gpt,
+        PropertyType,
+        PropertyType.UNKNOWN,
+        extra_instructions="If the description carries a build decade, prefer X.",
+    )
+
+    # Act
+    classifier.classify({"1970s semi"})
+
+    # Assert: the hint sits in the system prompt, before the JSON instruction.
+    system_prompt = chat_gpt.system_prompts[0]
+    assert system_prompt is not None
+    assert "If the description carries a build decade, prefer X." in system_prompt
+    hint_index = system_prompt.index("If the description carries a build decade")
+    json_index = system_prompt.index("Reply with only a JSON object")
+    assert hint_index < json_index
+
+
+def test_omitting_extra_instructions_leaves_the_system_prompt_unchanged() -> None:
+    # Arrange: a classifier without per-column guidance must still produce
+    # the original system prompt -- no trailing whitespace, no orphan hint.
+    chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}')
+    classifier = ChatGptColumnClassifier(chat_gpt, PropertyType, PropertyType.UNKNOWN)
+
+    # Act
+    classifier.classify({"semi-detached"})
+
+    # Assert
+    system_prompt = chat_gpt.system_prompts[0]
+    assert system_prompt is not None
+    assert system_prompt == (
+        "Classify each free-text description into exactly one category. "
+        "Categories: House, Bungalow, Flat, Maisonette, Park home. "
+        "Reply with only a JSON object mapping each original description "
+        "to its category, and nothing else."
+    )

From 99614820b98ed35c9a3c4e2e4d7c3d1e1b6216b3 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 29 May 2026 10:41:46 +0000
Subject: [PATCH 24/29] made landlord overrides sqs

---
 .../landlord_description_overrides/handler.py | 161 +++++++++---------
 ...lord_description_overrides_trigger_body.py |   4 +
 backend/app/bulk_uploads/router.py            |  21 +++
 backend/app/bulk_uploads/schema.py            |   9 +
 backend/app/config.py                         |   1 +
 ...lord_description_overrides_orchestrator.py |  36 ++++
 6 files changed, 152 insertions(+), 80 deletions(-)

diff --git a/applications/landlord_description_overrides/handler.py b/applications/landlord_description_overrides/handler.py
index 7b7b60af..801d1f12 100644
--- a/applications/landlord_description_overrides/handler.py
+++ b/applications/landlord_description_overrides/handler.py
@@ -1,14 +1,12 @@
 import logging
 import os
 from typing import Any
-from uuid import UUID
 
 import boto3
 
 from applications.landlord_description_overrides.landlord_description_overrides_trigger_body import (
     LandlordDescriptionOverridesTriggerBody,
 )
-from domain.addresses.unstandardised_address import AddressList
 from domain.landlord_description_overrides.built_form_type import BuiltFormType
 from domain.landlord_description_overrides.property_type import PropertyType
 from domain.landlord_description_overrides.roof_type import RoofType
@@ -33,36 +31,90 @@ from infrastructure.postgres.landlord_wall_type_override_postgres_repository imp
     LandlordWallTypeOverridePostgresRepository,
 )
 from infrastructure.s3.csv_s3_client import CsvS3Client
+from infrastructure.s3.s3_uri import parse_s3_uri
 from orchestration.classifiable_column import ClassifiableColumn
 from orchestration.landlord_description_overrides_orchestrator import (
     LandlordDescriptionOverridesOrchestrator,
 )
+from orchestration.task_orchestrator import TaskOrchestrator
 from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import (
     UnstandardisedAddressListCsvS3Repository,
 )
+from utilities.aws_lambda.subtask_handler import subtask_handler
 
 logger = logging.getLogger(__name__)
 
 
+def _build_columns(
+    column_mapping: dict[str, str], chat_gpt: ChatGPT, session: Any
+) -> list[ClassifiableColumn[Any]]:
+    """One ClassifiableColumn per mapped category.
+
+    ``column_mapping`` is ``{category -> source CSV header}``. One header may
+    feed several categories -- e.g. ``"Property Type"`` -> property_type and
+    built_form_type -- which falls out naturally because each is a separate
+    entry. Unknown categories are skipped.
+    """
+    factories = {
+        "property_type": lambda src: ClassifiableColumn(
+            name="property_type",
+            source_column=src,
+            classifier=ChatGptColumnClassifier(
+                chat_gpt, PropertyType, PropertyType.UNKNOWN
+            ),
+            repo=LandlordPropertyTypeOverridePostgresRepository(session),
+        ),
+        "built_form_type": lambda src: ClassifiableColumn(
+            name="built_form_type",
+            source_column=src,
+            classifier=ChatGptColumnClassifier(
+                chat_gpt, BuiltFormType, BuiltFormType.UNKNOWN
+            ),
+            repo=LandlordBuiltFormTypeOverridePostgresRepository(session),
+        ),
+        "wall_type": lambda src: ClassifiableColumn(
+            name="wall_type",
+            source_column=src,
+            classifier=ChatGptColumnClassifier(
+                chat_gpt,
+                WallType,
+                WallType.UNKNOWN,
+                extra_instructions=wall_type_construction_date_prompt_hint(),
+            ),
+            repo=LandlordWallTypeOverridePostgresRepository(session),
+        ),
+        "roof_type": lambda src: ClassifiableColumn(
+            name="roof_type",
+            source_column=src,
+            classifier=ChatGptColumnClassifier(
+                chat_gpt, RoofType, RoofType.UNKNOWN
+            ),
+            repo=LandlordRoofTypeOverridePostgresRepository(session),
+        ),
+    }
+
+    columns: list[ClassifiableColumn[Any]] = []
+    for category, source_column in column_mapping.items():
+        factory = factories.get(category)
+        if factory is None:
+            logger.warning("Unknown classifier category %r; skipping.", category)
+            continue
+        columns.append(factory(source_column))
+    return columns
+
+
+@subtask_handler()
 def handler(
-    body: dict[str, Any],
-    context: Any,
-) -> dict[str, list[str]]:
-    # TODO: replace with ``LandlordDescriptionOverridesTriggerBody.model_validate(body)``
-    # once this lambda is wired into the parent task pipeline via the SQS
-    # subtask envelope. Until then the trigger fields are hard-coded so the
-    # local invoker can exercise the full path. See ADR-0003 §Out of scope.
-    trigger = LandlordDescriptionOverridesTriggerBody(
-        task_id=UUID("00000000-0000-0000-0000-000000000001"),
-        sub_task_id=UUID("00000000-0000-0000-0000-000000000002"),
-        s3_uri="s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv",
-        portfolio_id=730,
-    )
+    body: dict[str, Any], context: Any, task_orchestrator: TaskOrchestrator
+) -> dict[str, int]:
+    trigger = LandlordDescriptionOverridesTriggerBody.model_validate(body)
 
-    bucket = "retrofit-data-dev"
+    # The classifier reads the ORIGINAL upload (raw landlord headers), so the S3
+    # bucket comes from the trigger URI rather than a fixed env var.
+    bucket, _key = parse_s3_uri(trigger.s3_uri)
 
-    # boto3.client is overloaded per-service in the installed stubs; cast
-    # to Any so the strict-mode checker treats it as opaque.
+    # boto3.client is overloaded per-service in the installed stubs; cast to Any
+    # so the strict-mode checker treats it as opaque.
     boto3_client: Any = (
         boto3.client
     )  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
@@ -73,74 +125,23 @@ def handler(
         csv_client, bucket
     )
 
-    # One transactional session per handler invocation: the context manager
-    # commits on clean exit and rolls back on exception, so the handler never
-    # invokes ``.commit()`` itself -- transaction semantics live in the
-    # infrastructure layer.
+    # Raw rows, not load_batch: the original upload carries the description
+    # columns but not the canonical address/postcode columns load_batch requires.
+    rows = csv_client.read_rows(trigger.s3_uri)
+
     engine = make_engine(PostgresConfig.from_env(os.environ))
     with transactional_session(engine) as session:
         chat_gpt = ChatGPT()
-        # The "Property Type" CSV column is read by two classifiers: the
-        # landlord's free-text (e.g. "semi-detached house") encodes both the
-        # dwelling kind (PropertyType) and how it joins to neighbours
-        # (BuiltFormType). Each classification lands in its own table.
-        columns: list[ClassifiableColumn[Any]] = [
-            ClassifiableColumn(
-                name="property_type",
-                source_column="Property Type",
-                classifier=ChatGptColumnClassifier(
-                    chat_gpt, PropertyType, PropertyType.UNKNOWN
-                ),
-                repo=LandlordPropertyTypeOverridePostgresRepository(session),
-            ),
-            ClassifiableColumn(
-                name="built_form_type",
-                source_column="Property Type",
-                classifier=ChatGptColumnClassifier(
-                    chat_gpt, BuiltFormType, BuiltFormType.UNKNOWN
-                ),
-                repo=LandlordBuiltFormTypeOverridePostgresRepository(session),
-            ),
-            ClassifiableColumn(
-                name="wall_type",
-                source_column="Walls",
-                classifier=ChatGptColumnClassifier(
-                    chat_gpt,
-                    WallType,
-                    WallType.UNKNOWN,
-                    extra_instructions=wall_type_construction_date_prompt_hint(),
-                ),
-                repo=LandlordWallTypeOverridePostgresRepository(session),
-            ),
-            ClassifiableColumn(
-                name="roof_type",
-                source_column="Roofs",
-                classifier=ChatGptColumnClassifier(
-                    chat_gpt, RoofType, RoofType.UNKNOWN
-                ),
-                repo=LandlordRoofTypeOverridePostgresRepository(session),
-            ),
-        ]
-
+        columns = _build_columns(trigger.column_mapping, chat_gpt, session)
         orchestrator = LandlordDescriptionOverridesOrchestrator(
             unstandardised_address_repo=unstandardised_address_repo,
             columns=columns,
         )
-
-        addressList: AddressList = orchestrator.get_unstandardised_addresses(
-            input_s3_uri=trigger.s3_uri
+        classified = orchestrator.classify_and_persist_from_rows(
+            rows, portfolio_id=trigger.portfolio_id
         )
 
-        # Cap the batch to the first 20 while the ChatGPT path is under test.
-        # Remove before wiring into the production subtask pipeline.
-        addressList = AddressList(addressList[:20])
-
-        classified = orchestrator.classify_and_persist(
-            addressList, portfolio_id=trigger.portfolio_id
-        )
-        for column, mapping in classified.items():
-            logger.info(
-                "Classified %d descriptions for column %r.", len(mapping), column
-            )
-
-    return {"hello": ["200"]}
+    counts = {name: len(mapping) for name, mapping in classified.items()}
+    for name, n in counts.items():
+        logger.info("Classified %d descriptions for column %r.", n, name)
+    return counts
diff --git a/applications/landlord_description_overrides/landlord_description_overrides_trigger_body.py b/applications/landlord_description_overrides/landlord_description_overrides_trigger_body.py
index 9f78215e..0ca80ec3 100644
--- a/applications/landlord_description_overrides/landlord_description_overrides_trigger_body.py
+++ b/applications/landlord_description_overrides/landlord_description_overrides_trigger_body.py
@@ -13,3 +13,7 @@ class LandlordDescriptionOverridesTriggerBody(BaseModel):
     # Python ``int`` is unbounded so the Pydantic side stays simple; the
     # SQLModel row class pins the storage to ``BigInteger``.
     portfolio_id: int
+    # category -> source CSV header (the classifier subset of the upload
+    # mapping). Defaulted so a malformed/empty message classifies nothing
+    # rather than failing validation.
+    column_mapping: dict[str, str] = {}
diff --git a/backend/app/bulk_uploads/router.py b/backend/app/bulk_uploads/router.py
index 9928b456..c050b18c 100644
--- a/backend/app/bulk_uploads/router.py
+++ b/backend/app/bulk_uploads/router.py
@@ -13,6 +13,7 @@ from backend.app.bulk_uploads.schema import (
     CombinedResultsResponse,
     CombinerTriggerRequest,
     FlagsSummary,
+    LandlordOverridesTriggerRequest,
     PostcodeSplitterTriggerRequest,
 )
 from backend.app.bulk_uploads.scoring import score_bucket
@@ -92,6 +93,26 @@ async def trigger_combiner(req: CombinerTriggerRequest):
     }
 
 
+@router.post("/trigger-landlord-overrides", status_code=202)
+async def trigger_landlord_overrides(req: LandlordOverridesTriggerRequest):
+    settings = get_settings()
+
+    try:
+        sqs = boto3.client("sqs", settings.AWS_DEFAULT_REGION)
+        response = sqs.send_message(
+            QueueUrl=settings.LANDLORD_OVERRIDES_SQS_URL,
+            MessageBody=req.model_dump_json(),
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"SQS error: {e}")
+
+    return {
+        "task_id": req.task_id,
+        "sub_task_id": req.sub_task_id,
+        "sqs_message_id": response.get("MessageId"),
+    }
+
+
 @router.get("/{task_id}/combined-results", response_model=CombinedResultsResponse)
 async def get_combined_results(
     task_id: UUID,
diff --git a/backend/app/bulk_uploads/schema.py b/backend/app/bulk_uploads/schema.py
index ca3b39ea..af797cac 100644
--- a/backend/app/bulk_uploads/schema.py
+++ b/backend/app/bulk_uploads/schema.py
@@ -14,6 +14,15 @@ class CombinerTriggerRequest(BaseModel):
     sub_task_id: str
 
 
+class LandlordOverridesTriggerRequest(BaseModel):
+    task_id: str
+    sub_task_id: str
+    s3_uri: str
+    portfolio_id: int
+    # category -> source CSV header (the classifier subset of the upload mapping)
+    column_mapping: dict[str, str]
+
+
 class FlagsSummary(BaseModel):
     duplicates: int
     missing: int
diff --git a/backend/app/config.py b/backend/app/config.py
index fcfb6d5b..f969518d 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -42,6 +42,7 @@ class Settings(BaseSettings):
     MAGICPLAN_SQS_URL: str = "changeme"
     POSTCODE_SPLITTER_SQS_URL: str = "changeme"
     COMBINER_SQS_URL: str = "changeme"
+    LANDLORD_OVERRIDES_SQS_URL: str = "changeme"
 
     # Third parties
     EPC_AUTH_TOKEN: str = "changeme"
diff --git a/orchestration/landlord_description_overrides_orchestrator.py b/orchestration/landlord_description_overrides_orchestrator.py
index 389d1afb..6203b8d5 100644
--- a/orchestration/landlord_description_overrides_orchestrator.py
+++ b/orchestration/landlord_description_overrides_orchestrator.py
@@ -81,3 +81,39 @@ class LandlordDescriptionOverridesOrchestrator:
                 continue
             column.repo.upsert_all(portfolio_id, mapping)
         return classified
+
+    def classify_and_persist_from_rows(
+        self, rows: list[dict[str, str]], portfolio_id: int
+    ) -> dict[str, dict[str, Enum]]:
+        """Classify + persist straight from raw CSV rows.
+
+        Unlike ``classify_and_persist``, this does not build an ``AddressList``,
+        so it has no canonical address/postcode requirement -- the classifier
+        only needs the raw description cells. Used when reading the original
+        landlord upload (raw headers) rather than the address-matching CSV.
+        """
+        col_to_desc = self._descriptions_from_rows(rows)
+        classified = {
+            column.name: column.classifier.classify(
+                col_to_desc.get(column.source_column, set())
+            )
+            for column in self._columns
+        }
+        for column in self._columns:
+            mapping = classified[column.name]
+            if not mapping:
+                continue
+            column.repo.upsert_all(portfolio_id, mapping)
+        return classified
+
+    @staticmethod
+    def _descriptions_from_rows(rows: list[dict[str, str]]) -> dict[str, set[str]]:
+        mappings: dict[str, set[str]] = {}
+        for row in rows:
+            for key, value in row.items():
+                bucket = mappings.setdefault(key, set())
+                for variant in (value or "").split(","):
+                    variant = variant.strip().lower()
+                    if variant:
+                        bucket.add(variant)
+        return mappings

From 47dfe34ec062bfd884a451bc9b22e92f62c5c9d7 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 29 May 2026 12:12:54 +0000
Subject: [PATCH 25/29] added landlord description overrides

---
 .github/workflows/_deploy_lambda.yml          |  5 ++
 .github/workflows/deploy_terraform.yml        | 41 +++++++++++++++
 .github/workflows/lambda_smoke_tests.yml      | 10 ++++
 .../landlord_description_overrides/Dockerfile |  2 +-
 .../requirements.txt                          |  1 +
 .../landlordDescriptionOverrides/main.tf      | 50 +++++++++++++++++++
 .../landlordDescriptionOverrides/outputs.tf   |  9 ++++
 .../landlordDescriptionOverrides/provider.tf  | 16 ++++++
 .../landlordDescriptionOverrides/variables.tf | 33 ++++++++++++
 deployment/terraform/shared/main.tf           | 41 ++++++++++++---
 10 files changed, 201 insertions(+), 7 deletions(-)
 create mode 100644 deployment/terraform/lambda/landlordDescriptionOverrides/main.tf
 create mode 100644 deployment/terraform/lambda/landlordDescriptionOverrides/outputs.tf
 create mode 100644 deployment/terraform/lambda/landlordDescriptionOverrides/provider.tf
 create mode 100644 deployment/terraform/lambda/landlordDescriptionOverrides/variables.tf

diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml
index 0d702155..70f9eabe 100644
--- a/.github/workflows/_deploy_lambda.yml
+++ b/.github/workflows/_deploy_lambda.yml
@@ -92,6 +92,9 @@ on:
 
       TF_VAR_magicplan_api_key:
         required: false
+
+      TF_VAR_openai_api_key:
+        required: false
 jobs:
   deploy:
     runs-on: ubuntu-latest
@@ -163,6 +166,7 @@ jobs:
           TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }}
           TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }}
           TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }}
+          TF_VAR_openai_api_key: ${{ secrets.TF_VAR_openai_api_key }}
         run: |
           ECR_REPO_URL_VAR=""
           if [[ -n "${{ inputs.ecr_repo }}" ]]; then
@@ -213,6 +217,7 @@ jobs:
           TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }}
           TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }}
           TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }}
+          TF_VAR_openai_api_key: ${{ secrets.TF_VAR_openai_api_key }}
         run: |
           EXTRA_VARS=""
           if [[ -n "${{ inputs.ecr_repo }}" ]]; then
diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml
index 7f2eb890..fc999bc0 100644
--- a/.github/workflows/deploy_terraform.yml
+++ b/.github/workflows/deploy_terraform.yml
@@ -203,6 +203,47 @@ jobs:
       AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
       AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
 
+  # ============================================================
+  # Build Landlord Description Overrides image and Push
+  # ============================================================
+  landlordDescriptionOverrides_image:
+    needs: [determine_stage, shared_terraform]
+    uses: ./.github/workflows/_build_image.yml
+    with:
+      ecr_repo: landlord_description_overrides-${{ needs.determine_stage.outputs.stage }}
+      dockerfile_path: applications/landlord_description_overrides/Dockerfile
+      build_context: .
+      build_args: |
+        DEV_DB_HOST=$DEV_DB_HOST
+        DEV_DB_PORT=$DEV_DB_PORT
+        DEV_DB_NAME=$DEV_DB_NAME
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+      DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
+      DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
+      DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
+
+  # ============================================================
+  # Deploy Landlord Description Overrides Lambda
+  # ============================================================
+  landlordDescriptionOverrides_lambda:
+    needs: [landlordDescriptionOverrides_image, determine_stage]
+    uses: ./.github/workflows/_deploy_lambda.yml
+    with:
+      lambda_name: landlordDescriptionOverrides
+      lambda_path: deployment/terraform/lambda/landlordDescriptionOverrides
+      stage: ${{ needs.determine_stage.outputs.stage }}
+      ecr_repo: landlord_description_overrides-${{ needs.determine_stage.outputs.stage }}
+      image_digest: ${{ needs.landlordDescriptionOverrides_image.outputs.image_digest }}
+      terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+      TF_VAR_openai_api_key: ${{ secrets.DEV_OPENAI_API_KEY }}
+
   # ============================================================
   # Build Bulk Address2UPRN Combiner image and Push
   # ============================================================
diff --git a/.github/workflows/lambda_smoke_tests.yml b/.github/workflows/lambda_smoke_tests.yml
index b562f91e..44288821 100644
--- a/.github/workflows/lambda_smoke_tests.yml
+++ b/.github/workflows/lambda_smoke_tests.yml
@@ -43,6 +43,16 @@ jobs:
       build_context: .
       service_name: postcode-splitter-ddd
 
+  # ============================================================
+  # Landlord Description Overrides
+  # ============================================================
+  landlord_description_overrides_smoke_test:
+    uses: ./.github/workflows/_smoke_test_lambda.yml
+    with:
+      dockerfile_path: applications/landlord_description_overrides/Dockerfile
+      build_context: .
+      service_name: landlord-description-overrides
+
   # ============================================================
   # Bulk Address2UPRN Combiner
   # ============================================================
diff --git a/applications/landlord_description_overrides/Dockerfile b/applications/landlord_description_overrides/Dockerfile
index e2456b81..c2d4faf7 100644
--- a/applications/landlord_description_overrides/Dockerfile
+++ b/applications/landlord_description_overrides/Dockerfile
@@ -15,7 +15,7 @@ ENV POSTGRES_DATABASE=${DEV_DB_NAME}
 
 WORKDIR /var/task
 
-COPY applications/postcode_splitter/requirements.txt .
+COPY applications/landlord_description_overrides/requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 
 # Copy the layered source the handler imports from. The new splitter pulls
diff --git a/applications/landlord_description_overrides/requirements.txt b/applications/landlord_description_overrides/requirements.txt
index 6a85a255..b2917847 100644
--- a/applications/landlord_description_overrides/requirements.txt
+++ b/applications/landlord_description_overrides/requirements.txt
@@ -2,3 +2,4 @@ boto3
 pydantic
 sqlmodel
 psycopg2-binary
+openai
diff --git a/deployment/terraform/lambda/landlordDescriptionOverrides/main.tf b/deployment/terraform/lambda/landlordDescriptionOverrides/main.tf
new file mode 100644
index 00000000..5a69de22
--- /dev/null
+++ b/deployment/terraform/lambda/landlordDescriptionOverrides/main.tf
@@ -0,0 +1,50 @@
+data "terraform_remote_state" "shared" {
+  backend = "s3"
+  config = {
+    bucket = "assessment-model-terraform-state"
+    key    = "env:/${var.stage}/terraform.tfstate"
+    region = "eu-west-2"
+  }
+}
+
+data "aws_secretsmanager_secret_version" "db_credentials" {
+  secret_id = "${var.stage}/assessment_model/db_credentials"
+}
+
+locals {
+  db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
+}
+
+module "lambda" {
+  source = "../../modules/lambda_with_sqs"
+
+  name  = "landlord-description-overrides"
+  stage = var.stage
+
+  image_uri = local.image_uri
+
+  # The classifier calls OpenAI once per distinct description per column, so it
+  # is latency-bound. 300s leaves headroom under the queue's 1000s visibility
+  # timeout. batch_size = 1 keeps one upload per invocation, so a single bad
+  # record cannot redrive its siblings. maximum_concurrency caps fan-out to
+  # respect OpenAI rate limits.
+  timeout             = 300
+  batch_size          = 1
+  maximum_concurrency = 5
+
+  environment = merge(
+    {
+      STAGE             = var.stage
+      LOG_LEVEL         = "info"
+      POSTGRES_USERNAME = local.db_credentials.db_assessment_model_username
+      POSTGRES_PASSWORD = local.db_credentials.db_assessment_model_password
+      OPENAI_API_KEY    = var.openai_api_key
+    },
+  )
+}
+
+# Attach S3 read policy so the handler can read the original upload CSV.
+resource "aws_iam_role_policy_attachment" "landlord_overrides_s3_read" {
+  role       = module.lambda.role_name
+  policy_arn = data.terraform_remote_state.shared.outputs.landlord_overrides_s3_read_arn
+}
diff --git a/deployment/terraform/lambda/landlordDescriptionOverrides/outputs.tf b/deployment/terraform/lambda/landlordDescriptionOverrides/outputs.tf
new file mode 100644
index 00000000..7c6534db
--- /dev/null
+++ b/deployment/terraform/lambda/landlordDescriptionOverrides/outputs.tf
@@ -0,0 +1,9 @@
+output "landlord_description_overrides_queue_url" {
+  value       = module.lambda.queue_url
+  description = "URL of the Landlord Description Overrides SQS queue (wire into the FastAPI LANDLORD_OVERRIDES_SQS_URL)"
+}
+
+output "landlord_description_overrides_queue_arn" {
+  value       = module.lambda.queue_arn
+  description = "ARN of the Landlord Description Overrides SQS queue"
+}
diff --git a/deployment/terraform/lambda/landlordDescriptionOverrides/provider.tf b/deployment/terraform/lambda/landlordDescriptionOverrides/provider.tf
new file mode 100644
index 00000000..ed2fa60e
--- /dev/null
+++ b/deployment/terraform/lambda/landlordDescriptionOverrides/provider.tf
@@ -0,0 +1,16 @@
+terraform {
+  required_providers {
+    aws = {
+      source  = "hashicorp/aws"
+      version = ">= 5.0"
+    }
+  }
+
+  backend "s3" {
+    bucket = "landlord-description-overrides-terraform-state"
+    key    = "terraform.tfstate"
+    region = "eu-west-2"
+  }
+
+  required_version = ">= 1.2.0"
+}
diff --git a/deployment/terraform/lambda/landlordDescriptionOverrides/variables.tf b/deployment/terraform/lambda/landlordDescriptionOverrides/variables.tf
new file mode 100644
index 00000000..63437a5a
--- /dev/null
+++ b/deployment/terraform/lambda/landlordDescriptionOverrides/variables.tf
@@ -0,0 +1,33 @@
+variable "lambda_name" {
+  type        = string
+  description = "Logical name of the lambda (e.g. landlordDescriptionOverrides)"
+}
+
+variable "stage" {
+  description = "Deployment stage (e.g. dev, prod)"
+  type        = string
+}
+
+variable "ecr_repo_url" {
+  type        = string
+  description = "ECR repository URL (no tag, no digest)"
+}
+
+variable "image_digest" {
+  type        = string
+  description = "Image digest (sha256:...)"
+}
+
+variable "openai_api_key" {
+  type        = string
+  description = "OpenAI API key used by the ChatGPT column classifier"
+  sensitive   = true
+}
+
+locals {
+  image_uri = "${var.ecr_repo_url}@${var.image_digest}"
+}
+
+output "resolved_image_uri" {
+  value = local.image_uri
+}
diff --git a/deployment/terraform/shared/main.tf b/deployment/terraform/shared/main.tf
index 0a9e87f6..7d179203 100644
--- a/deployment/terraform/shared/main.tf
+++ b/deployment/terraform/shared/main.tf
@@ -268,11 +268,11 @@ output "retrofit_heat_baseline_predictions_bucket_name" {
 
 // We make this bucket presignable, because we want to generate download links for the frontend
 module "retrofit_energy_assessments" {
-  source             = "../modules/s3_presignable_bucket"
-  bucketname         = "retrofit-energy-assessments-${var.stage}"
-  allowed_origins    = var.allowed_origins
-  environment        = var.stage
-  enable_versioning  = true
+  source            = "../modules/s3_presignable_bucket"
+  bucketname        = "retrofit-energy-assessments-${var.stage}"
+  allowed_origins   = var.allowed_origins
+  environment       = var.stage
+  enable_versioning = true
 }
 
 output "retrofit_energy_assessments_bucket_name" {
@@ -494,6 +494,35 @@ output "postcode_splitter_s3_read_arn" {
   value = module.postcode_splitter_s3_read.policy_arn
 }
 
+################################################
+# Landlord Description Overrides – Lambda
+################################################
+module "landlord_description_overrides_state_bucket" {
+  source      = "../modules/tf_state_bucket"
+  bucket_name = "landlord-description-overrides-terraform-state"
+}
+
+module "landlord_description_overrides_registry" {
+  source = "../modules/container_registry"
+  name   = "landlord_description_overrides"
+  stage  = var.stage
+}
+
+# S3 policy for the landlord classifier to read the original upload CSV.
+module "landlord_overrides_s3_read" {
+  source = "../modules/s3_iam_policy"
+
+  policy_name        = "LandlordOverridesReadS3"
+  policy_description = "Allow landlord description overrides Lambda to read from retrofit-data bucket"
+  bucket_arns        = ["arn:aws:s3:::retrofit-data-${var.stage}"]
+  actions            = ["s3:GetObject", "s3:ListBucket"]
+  resource_paths     = ["/*"]
+}
+
+output "landlord_overrides_s3_read_arn" {
+  value = module.landlord_overrides_s3_read.policy_arn
+}
+
 ################################################
 # Bulk Address2UPRN Combiner – Lambda ECR
 ################################################
@@ -729,7 +758,7 @@ module "hubspot_etl_bucket" {
 module "hubspot_etl_registry" {
   source = "../modules/container_registry"
   name   = "hubspot-etl"
-  stage = var.stage
+  stage  = var.stage
 
 }
 

From 3e30b4af4037c54c8fd2956503d8b9595eb6c74d Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 29 May 2026 16:17:06 +0000
Subject: [PATCH 26/29] tests wrong environemnt

---
 .../landlord_description_overrides/handler.py |  20 ++-
 .../requirements.txt                          |   2 +-
 infrastructure/postgres/engine.py             |  18 +++
 ...lord_description_overrides_orchestrator.py |  69 ++++++---
 test.requirements.txt                         |   3 +-
 ...lord_description_overrides_orchestrator.py | 144 ++++++++++++++++++
 6 files changed, 226 insertions(+), 30 deletions(-)

diff --git a/applications/landlord_description_overrides/handler.py b/applications/landlord_description_overrides/handler.py
index 801d1f12..901a8297 100644
--- a/applications/landlord_description_overrides/handler.py
+++ b/applications/landlord_description_overrides/handler.py
@@ -17,7 +17,7 @@ from domain.landlord_description_overrides.wall_type_construction_dates import (
 from infrastructure.chatgpt.chatgpt import ChatGPT
 from infrastructure.chatgpt.chatgpt_column_classifier import ChatGptColumnClassifier
 from infrastructure.postgres.config import PostgresConfig
-from infrastructure.postgres.engine import make_engine, transactional_session
+from infrastructure.postgres.engine import commit_scope, make_engine, make_session
 from infrastructure.postgres.landlord_built_form_type_override_postgres_repository import (
     LandlordBuiltFormTypeOverridePostgresRepository,
 )
@@ -130,16 +130,26 @@ def handler(
     rows = csv_client.read_rows(trigger.s3_uri)
 
     engine = make_engine(PostgresConfig.from_env(os.environ))
-    with transactional_session(engine) as session:
+    # The session is built up front (SQLModel sessions are lazy, so no
+    # connection is checked out yet) and owned by this handler. Classification
+    # runs first and calls ChatGPT, which is slow; we deliberately keep no
+    # transaction open across it. Only the persistence below -- inside
+    # ``commit_scope`` -- holds a connection.
+    session = make_session(engine)
+    try:
         chat_gpt = ChatGPT()
         columns = _build_columns(trigger.column_mapping, chat_gpt, session)
         orchestrator = LandlordDescriptionOverridesOrchestrator(
             unstandardised_address_repo=unstandardised_address_repo,
             columns=columns,
         )
-        classified = orchestrator.classify_and_persist_from_rows(
-            rows, portfolio_id=trigger.portfolio_id
-        )
+
+        classified = orchestrator.classify_from_rows(rows)
+
+        with commit_scope(session):
+            orchestrator.persist(classified, portfolio_id=trigger.portfolio_id)
+    finally:
+        session.close()
 
     counts = {name: len(mapping) for name, mapping in classified.items()}
     for name, n in counts.items():
diff --git a/applications/landlord_description_overrides/requirements.txt b/applications/landlord_description_overrides/requirements.txt
index b2917847..590c4667 100644
--- a/applications/landlord_description_overrides/requirements.txt
+++ b/applications/landlord_description_overrides/requirements.txt
@@ -2,4 +2,4 @@ boto3
 pydantic
 sqlmodel
 psycopg2-binary
-openai
+openai==1.93.0
diff --git a/infrastructure/postgres/engine.py b/infrastructure/postgres/engine.py
index ea2b35ad..2558532e 100644
--- a/infrastructure/postgres/engine.py
+++ b/infrastructure/postgres/engine.py
@@ -40,3 +40,21 @@ def transactional_session(engine: Engine) -> Iterator[Session]:
         raise
     finally:
         session.close()
+
+
+@contextmanager  # pyright: ignore[reportDeprecated]
+def commit_scope(session: Session) -> Iterator[Session]:
+    """Commit a caller-owned session on clean exit; roll back on error.
+
+    Like ``transactional_session`` but for a session the caller already holds
+    and will close itself. Use it to keep slow, non-DB work *outside* the
+    transaction: build the session, run the slow work, then enter
+    ``commit_scope`` only for the persistence -- so a connection is checked out
+    (SQLModel sessions are lazy) for the shortest possible window.
+    """
+    try:
+        yield session
+        session.commit()
+    except Exception:
+        session.rollback()
+        raise
diff --git a/orchestration/landlord_description_overrides_orchestrator.py b/orchestration/landlord_description_overrides_orchestrator.py
index 6203b8d5..e43992cf 100644
--- a/orchestration/landlord_description_overrides_orchestrator.py
+++ b/orchestration/landlord_description_overrides_orchestrator.py
@@ -60,50 +60,73 @@ class LandlordDescriptionOverridesOrchestrator:
             for column in self._columns
         }
 
+    def persist(
+        self, classified: dict[str, dict[str, Enum]], portfolio_id: int
+    ) -> None:
+        """Persist already-classified results via each column's repository.
+
+        ``classified`` is keyed by ``ClassifiableColumn.name`` -- the shape
+        ``classify_columns`` and ``classify_from_rows`` return. Each non-empty
+        mapping is written through the column's own repo under
+        ``source = 'classifier'``; an empty mapping (a registered column absent
+        from this batch) skips the DB round-trip.
+
+        The orchestrator does not commit -- the caller owns the transaction
+        boundary, and is expected to open it only around this call so the
+        slow classification never holds a connection.
+        """
+        for column in self._columns:
+            mapping = classified.get(column.name)
+            if not mapping:
+                continue
+            column.repo.upsert_all(portfolio_id, mapping)
+
     def classify_and_persist(
         self, addresses: AddressList, portfolio_id: int
     ) -> dict[str, dict[str, Enum]]:
         """Classify every registered column and persist the results.
 
-        Each non-empty mapping is written via the column's repository under
-        ``source = 'classifier'``. Empty mappings (a registered column whose
-        ``source_column`` is absent from this batch) skip the DB round-trip.
-        The orchestrator does not commit -- the caller owns the transaction
-        boundary.
-
         Returns the same shape as ``classify_columns`` so callers can log
         per-column counts.
         """
         classified = self.classify_columns(addresses)
-        for column in self._columns:
-            mapping = classified[column.name]
-            if not mapping:
-                continue
-            column.repo.upsert_all(portfolio_id, mapping)
+        self.persist(classified, portfolio_id)
         return classified
 
-    def classify_and_persist_from_rows(
-        self, rows: list[dict[str, str]], portfolio_id: int
+    def classify_from_rows(
+        self, rows: list[dict[str, str]]
     ) -> dict[str, dict[str, Enum]]:
-        """Classify + persist straight from raw CSV rows.
+        """Classify raw CSV rows without touching the database.
 
-        Unlike ``classify_and_persist``, this does not build an ``AddressList``,
-        so it has no canonical address/postcode requirement -- the classifier
-        only needs the raw description cells. Used when reading the original
+        The classification half of ``classify_and_persist_from_rows``, split
+        out so a caller can run the slow ChatGPT work *before* opening a
+        transaction and then write the finished results with ``persist`` inside
+        one short-lived connection.
+
+        Unlike the ``AddressList`` path this builds no ``AddressList``, so it
+        has no canonical address/postcode requirement -- the classifier only
+        needs the raw description cells. Used when reading the original
         landlord upload (raw headers) rather than the address-matching CSV.
         """
         col_to_desc = self._descriptions_from_rows(rows)
-        classified = {
+        return {
             column.name: column.classifier.classify(
                 col_to_desc.get(column.source_column, set())
             )
             for column in self._columns
         }
-        for column in self._columns:
-            mapping = classified[column.name]
-            if not mapping:
-                continue
-            column.repo.upsert_all(portfolio_id, mapping)
+
+    def classify_and_persist_from_rows(
+        self, rows: list[dict[str, str]], portfolio_id: int
+    ) -> dict[str, dict[str, Enum]]:
+        """Classify + persist straight from raw CSV rows in one call.
+
+        A convenience composition of ``classify_from_rows`` + ``persist``.
+        Prefer calling the two separately when classification is slow, so the
+        transaction opens only around ``persist`` (see the Lambda handler).
+        """
+        classified = self.classify_from_rows(rows)
+        self.persist(classified, portfolio_id)
         return classified
 
     @staticmethod
diff --git a/test.requirements.txt b/test.requirements.txt
index 26125034..c5b71977 100644
--- a/test.requirements.txt
+++ b/test.requirements.txt
@@ -10,4 +10,5 @@ fuzzywuzzy
 pymupdf
 playwright==1.58.0
 msal
-moto[s3,sqs]
\ No newline at end of file
+moto[s3,sqs]
+openai==1.93.0
\ No newline at end of file
diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
index eee4a310..d05b5911 100644
--- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py
+++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
@@ -323,3 +323,147 @@ def test_classify_and_persist_skips_upsert_for_a_column_absent_from_the_batch()
     # assert: Property Type wrote; Walls did not.
     assert property_type_repo.calls == [(7, {"semi-detached": PropertyType.HOUSE})]
     assert wall_type_repo.calls == []
+
+
+def test_classify_from_rows_classifies_each_column_without_persisting() -> None:
+    # arrange: raw CSV rows (not an AddressList) carry two classifiable columns.
+    rows = [{"Property Type": "semi-detached", "Walls": "solid brick"}]
+    property_types = _StubColumnClassifier({"semi-detached": PropertyType.HOUSE})
+    wall_types = _StubColumnClassifier(
+        {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED}
+    )
+    property_type_repo = _StubLandlordOverrideRepository()
+    wall_type_repo = _StubLandlordOverrideRepository()
+
+    # act
+    result = _orchestrator(
+        [
+            _column("property_type", "Property Type", property_types, property_type_repo),
+            _column("wall_type", "Walls", wall_types, wall_type_repo),
+        ]
+    ).classify_from_rows(rows)
+
+    # assert: each classifier ran against its column's descriptions, keyed by
+    # name -- and NOT a single repo was touched (classification is DB-free, so
+    # the slow ChatGPT work can run before any transaction opens).
+    assert result == {
+        "property_type": {"semi-detached": PropertyType.HOUSE},
+        "wall_type": {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED},
+    }
+    assert property_type_repo.calls == []
+    assert wall_type_repo.calls == []
+
+
+def test_classify_from_rows_splits_and_normalises_descriptions() -> None:
+    # arrange: one cell packs several descriptions with inconsistent casing,
+    # spread across rows. The rows path must fold them exactly like the
+    # AddressList path: comma-split, trimmed, lower-cased, de-duped.
+    rows = [
+        {"Walls": "Solid Brick, cavity"},
+        {"Walls": "SOLID BRICK"},
+    ]
+    wall_types = _StubColumnClassifier({})
+
+    # act
+    _orchestrator(
+        [_column("wall_type", "Walls", wall_types)]
+    ).classify_from_rows(rows)
+
+    # assert: the classifier saw one normalised entry per distinct variant.
+    assert wall_types.received == {"solid brick", "cavity"}
+
+
+def test_classify_from_rows_yields_empty_mapping_for_an_absent_column() -> None:
+    # arrange: a column is registered for a header the rows lack.
+    rows = [{"Walls": "cavity"}]
+    property_types = _StubColumnClassifier({})
+
+    # act
+    result = _orchestrator(
+        [_column("property_type", "Property Type", property_types)]
+    ).classify_from_rows(rows)
+
+    # assert: the absent column classified an empty description set.
+    assert result == {"property_type": {}}
+    assert property_types.received == set()
+
+
+def test_persist_routes_each_columns_mapping_to_its_own_repo() -> None:
+    # arrange: a finished ``classified`` mapping (as classify_* would return)
+    # and two columns with distinct repos.
+    property_type_repo = _StubLandlordOverrideRepository()
+    wall_type_repo = _StubLandlordOverrideRepository()
+    columns: list[ClassifiableColumn[Any]] = [
+        _column("property_type", "Property Type", _StubColumnClassifier({}), property_type_repo),
+        _column("wall_type", "Walls", _StubColumnClassifier({}), wall_type_repo),
+    ]
+    classified: dict[str, dict[str, Enum]] = {
+        "property_type": {"semi-detached": PropertyType.HOUSE},
+        "wall_type": {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED},
+    }
+
+    # act
+    _orchestrator(columns).persist(classified, portfolio_id=42)
+
+    # assert: each repo received exactly its own column's mapping.
+    assert property_type_repo.calls == [(42, {"semi-detached": PropertyType.HOUSE})]
+    assert wall_type_repo.calls == [
+        (42, {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED})
+    ]
+
+
+def test_persist_skips_empty_and_missing_mappings() -> None:
+    # arrange: ``property_type`` has an empty mapping; ``wall_type`` is absent
+    # from ``classified`` entirely. Neither should hit the DB -- and the
+    # missing key must not raise (``persist`` reads with ``.get``).
+    property_type_repo = _StubLandlordOverrideRepository()
+    wall_type_repo = _StubLandlordOverrideRepository()
+    columns: list[ClassifiableColumn[Any]] = [
+        _column("property_type", "Property Type", _StubColumnClassifier({}), property_type_repo),
+        _column("wall_type", "Walls", _StubColumnClassifier({}), wall_type_repo),
+    ]
+    classified: dict[str, dict[str, Enum]] = {"property_type": {}}
+
+    # act
+    _orchestrator(columns).persist(classified, portfolio_id=7)
+
+    # assert: no upserts at all.
+    assert property_type_repo.calls == []
+    assert wall_type_repo.calls == []
+
+
+def test_classify_and_persist_from_rows_composes_classify_then_persist() -> None:
+    # arrange: the one-shot rows path must classify AND route to repos, so the
+    # convenience composition stays equivalent to calling the two in sequence.
+    rows = [{"Property Type": "semi-detached", "Walls": "solid brick"}]
+    property_type_repo = _StubLandlordOverrideRepository()
+    wall_type_repo = _StubLandlordOverrideRepository()
+    columns: list[ClassifiableColumn[Any]] = [
+        _column(
+            "property_type",
+            "Property Type",
+            _StubColumnClassifier({"semi-detached": PropertyType.HOUSE}),
+            property_type_repo,
+        ),
+        _column(
+            "wall_type",
+            "Walls",
+            _StubColumnClassifier(
+                {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED}
+            ),
+            wall_type_repo,
+        ),
+    ]
+
+    # act
+    result = _orchestrator(columns).classify_and_persist_from_rows(rows, portfolio_id=99)
+
+    # assert: same return shape as classify_from_rows, and each repo wrote once.
+    assert result == {
+        "property_type": {"semi-detached": PropertyType.HOUSE},
+        "wall_type": {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED},
+    }
+    assert property_type_repo.calls == [(99, {"semi-detached": PropertyType.HOUSE})]
+    assert wall_type_repo.calls == [
+        (99, {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED})
+    ]

From 7f2f2b95a0b0e304f2003ea13063884ebe55fd40 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Mon, 1 Jun 2026 09:34:35 +0000
Subject: [PATCH 27/29] update tests to reflect wall types

---
 ..._wall_type_override_postgres_repository.py | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py b/tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py
index 2aae83dd..4cee6f5a 100644
--- a/tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py
+++ b/tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py
@@ -49,13 +49,13 @@ def test_inserts_a_fresh_row_with_source_classifier(session: Session) -> None:
 
     # act
     repo.upsert_all(
-        portfolio_id=1, descriptions_to_values={"cavity insulated": WallType.CAVITY}
+        portfolio_id=1, descriptions_to_values={"cavity insulated": WallType.CAVITY_FILLED}
     )
     session.commit()
 
     # assert
     row = _select_row(session, portfolio_id=1, description="cavity insulated")
-    assert row.value is WallType.CAVITY
+    assert row.value is WallType.CAVITY_FILLED
     assert row.source == OverrideSource.CLASSIFIER
 
 
@@ -63,19 +63,19 @@ def test_reupsert_overwrites_a_classifier_row(session: Session) -> None:
     # arrange: a stale classifier row exists.
     repo = LandlordWallTypeOverridePostgresRepository(session)
     repo.upsert_all(
-        portfolio_id=1, descriptions_to_values={"old red brick": WallType.CAVITY}
+        portfolio_id=1, descriptions_to_values={"old red brick": WallType.CAVITY_FILLED}
     )
     session.commit()
 
     # act: re-classify with a different category.
     repo.upsert_all(
-        portfolio_id=1, descriptions_to_values={"old red brick": WallType.SOLID_BRICK}
+        portfolio_id=1, descriptions_to_values={"old red brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED}
     )
     session.commit()
 
     # assert: the new classification wins.
     row = _select_row(session, portfolio_id=1, description="old red brick")
-    assert row.value is WallType.SOLID_BRICK
+    assert row.value is WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED
     assert row.source == OverrideSource.CLASSIFIER
 
 
@@ -86,7 +86,7 @@ def test_reupsert_does_not_overwrite_a_user_row(session: Session) -> None:
     user_row = LandlordWallTypeOverrideRow(
         portfolio_id=1,
         description="old red brick",
-        value=WallType.SANDSTONE,
+        value=WallType.SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED,
         source=OverrideSource.USER,
     )
     session.add(user_row)
@@ -97,13 +97,13 @@ def test_reupsert_does_not_overwrite_a_user_row(session: Session) -> None:
     # be silently skipped -- user edits beat classifier reruns.
     repo = LandlordWallTypeOverridePostgresRepository(session)
     repo.upsert_all(
-        portfolio_id=1, descriptions_to_values={"old red brick": WallType.SOLID_BRICK}
+        portfolio_id=1, descriptions_to_values={"old red brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED}
     )
     session.commit()
 
     # assert: the user row is unchanged.
     row = _select_row(session, portfolio_id=1, description="old red brick")
-    assert row.value is WallType.SANDSTONE
+    assert row.value is WallType.SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED
     assert row.source == OverrideSource.USER
 
 
@@ -114,16 +114,16 @@ def test_upsert_keeps_other_portfolios_descriptions_independent(
     # same description for two different portfolios must coexist as two rows.
     repo = LandlordWallTypeOverridePostgresRepository(session)
     repo.upsert_all(
-        portfolio_id=1, descriptions_to_values={"old red brick": WallType.CAVITY}
+        portfolio_id=1, descriptions_to_values={"old red brick": WallType.CAVITY_FILLED}
     )
     repo.upsert_all(
-        portfolio_id=2, descriptions_to_values={"old red brick": WallType.SOLID_BRICK}
+        portfolio_id=2, descriptions_to_values={"old red brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED}
     )
     session.commit()
 
     # assert: both rows survive with their own values.
-    assert _select_row(session, 1, "old red brick").value is WallType.CAVITY
-    assert _select_row(session, 2, "old red brick").value is WallType.SOLID_BRICK
+    assert _select_row(session, 1, "old red brick").value is WallType.CAVITY_FILLED
+    assert _select_row(session, 2, "old red brick").value is WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED
 
 
 def test_upsert_persists_unknown_so_a_user_can_resolve_it_later(

From 9c1b6c76a9a9d91d688aec9f49dd06449570fb31 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Mon, 1 Jun 2026 12:08:29 +0000
Subject: [PATCH 28/29] delete playground

---
 playground.py | 57 ---------------------------------------------------
 1 file changed, 57 deletions(-)
 delete mode 100644 playground.py

diff --git a/playground.py b/playground.py
deleted file mode 100644
index 5e9001e1..00000000
--- a/playground.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""Read a file and return unique values from a chosen column."""
-
-from pathlib import Path
-import argparse
-import sys
-
-import pandas as pd
-
-
-def read_file(path: str | Path) -> pd.DataFrame:
-    path = Path(path)
-    suffix = path.suffix.lower()
-    if suffix == ".csv":
-        return pd.read_csv(path)
-    if suffix == ".tsv":
-        return pd.read_csv(path, sep="\t")
-    if suffix in {".xlsx", ".xls"}:
-        return pd.read_excel(path)
-    if suffix == ".parquet":
-        return pd.read_parquet(path)
-    if suffix == ".json":
-        return pd.read_json(path)
-    raise ValueError(f"Unsupported file type: {suffix}")
-
-
-def get_unique(path: str | Path, column: str, dropna: bool = True) -> list:
-    df = read_file(Path(path))
-    if column not in df.columns:
-        raise KeyError(f"Column {column!r} not found. Available: {list(df.columns)}")
-    series = df[column].dropna() if dropna else df[column]
-    return series.unique().tolist()
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--path", default="/workspaces/model/certificates-2026.csv")
-    parser.add_argument("--column", nargs="walls_description")
-    parser.add_argument("--keep-na", action="store_true")
-    args, _ = parser.parse_known_args()
-
-    df = read_file(args.path)
-
-    if not args.column:
-        print("Available columns:")
-        for c in df.columns:
-            print(f"  - {c}")
-        return 0
-
-    column = "wall "
-    series = df[column] if args.keep_na else df[column].dropna()
-    for value in series.unique():
-        print(value)
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())

From c9a9620527cce18062805b0d80812c5a3e76fbf4 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Mon, 1 Jun 2026 14:00:31 +0000
Subject: [PATCH 29/29] pr review, move domain and orhcestration

---
 .devcontainer/backend/requirements.txt        |  1 +
 .../landlord_description_overrides/handler.py | 10 ++--
 backend/tests/test_search_epc.py              | 48 +------------------
 .../__init__.py                               |  0
 .../built_form_type.py                        |  0
 .../property_type.py                          |  0
 .../roof_type.py                              |  0
 .../wall_type.py                              |  0
 .../wall_type_construction_dates.py           |  2 +-
 .../chatgpt/chatgpt_column_classifier.py      |  2 +-
 .../column_classifier.py                      |  0
 ..._form_type_override_postgres_repository.py |  2 +-
 ...landlord_built_form_type_override_table.py |  2 +-
 ...perty_type_override_postgres_repository.py |  2 +-
 .../landlord_property_type_override_table.py  |  2 +-
 ..._roof_type_override_postgres_repository.py |  2 +-
 .../landlord_roof_type_override_table.py      |  2 +-
 ..._wall_type_override_postgres_repository.py |  2 +-
 .../landlord_wall_type_override_table.py      |  2 +-
 orchestration/classifiable_column.py          |  2 +-
 .../chatgpt/test_chatgpt_column_classifier.py |  6 +--
 ...lord_description_overrides_orchestrator.py |  8 ++--
 ...perty_type_override_postgres_repository.py |  2 +-
 ..._wall_type_override_postgres_repository.py |  2 +-
 24 files changed, 28 insertions(+), 71 deletions(-)
 rename domain/{landlord_description_overrides => epc}/__init__.py (100%)
 rename domain/{landlord_description_overrides => epc}/built_form_type.py (100%)
 rename domain/{landlord_description_overrides => epc}/property_type.py (100%)
 rename domain/{landlord_description_overrides => epc}/roof_type.py (100%)
 rename domain/{landlord_description_overrides => epc}/wall_type.py (100%)
 rename domain/{landlord_description_overrides => epc}/wall_type_construction_dates.py (97%)
 rename {domain/landlord_description_overrides => infrastructure}/column_classifier.py (100%)

diff --git a/.devcontainer/backend/requirements.txt b/.devcontainer/backend/requirements.txt
index 7a879773..2db3710a 100644
--- a/.devcontainer/backend/requirements.txt
+++ b/.devcontainer/backend/requirements.txt
@@ -27,3 +27,4 @@ pytest-postgresql
 # Formatting
 black==26.1.0
 boto3-stubs
+openai
diff --git a/applications/landlord_description_overrides/handler.py b/applications/landlord_description_overrides/handler.py
index 901a8297..e2afb4bd 100644
--- a/applications/landlord_description_overrides/handler.py
+++ b/applications/landlord_description_overrides/handler.py
@@ -7,11 +7,11 @@ import boto3
 from applications.landlord_description_overrides.landlord_description_overrides_trigger_body import (
     LandlordDescriptionOverridesTriggerBody,
 )
-from domain.landlord_description_overrides.built_form_type import BuiltFormType
-from domain.landlord_description_overrides.property_type import PropertyType
-from domain.landlord_description_overrides.roof_type import RoofType
-from domain.landlord_description_overrides.wall_type import WallType
-from domain.landlord_description_overrides.wall_type_construction_dates import (
+from domain.epc.built_form_type import BuiltFormType
+from domain.epc.property_type import PropertyType
+from domain.epc.roof_type import RoofType
+from domain.epc.wall_type import WallType
+from domain.epc.wall_type_construction_dates import (
     wall_type_construction_date_prompt_hint,
 )
 from infrastructure.chatgpt.chatgpt import ChatGPT
diff --git a/backend/tests/test_search_epc.py b/backend/tests/test_search_epc.py
index a0fef7e9..aaf5d680 100644
--- a/backend/tests/test_search_epc.py
+++ b/backend/tests/test_search_epc.py
@@ -14,55 +14,11 @@ class TestSearchEpcIntegration:
     def epc_auth_token(self):
         return os.getenv("EPC_AUTH_TOKEN")
 
-    @pytest.mark.parametrize(
-        "address, postcode, uprn, skip_os, lmk_key, n_old_epcs",
-        [
-            # Test case 1: Valid address and postcode, skipping OS
-            # In this case, the property is an individual flat but the uprn associated to the
-            # EPC is for the building as a whole, possibly because there was a conversion of sorts
-            ("Garden Flat, 48 Bedminster Parade", "BS3 4HS", 308249, True,
-             "260907a5431fa073d193cc6bbec51fbf1ba9a61845ab2503f85aa19ce3ed6afd", 1),
-
-            # Test case 2: Another valid address and postcode
-            # In this case, the newest EPC, does not have a uprn associated to it. If we did a search by
-            # uprn, we would get an old EPC
-            ("Flat 8, Hainton House", "DN32 9AQ", "", True,
-             "bd1149a20a73397184f07a9955f872424826e70f4870c058d71be887766ee1f8", 2),
-            # Test case 3: When we make a request to the API for this property, we get back results for
-            # flats 1, 2 and 3. We have some logic to handle the response so that we get back flat 1
-            ("Flat 1, 1 Tottenham Street, London", "W1T 2AE", 5167411, True,
-             "3e6414d7f15f4cf7a69dc20c469bcf043d31a49239b183f1bd0c0e1aafa23c93", 0),
-
-        ],
-    )
-    def test_find_property(self, epc_auth_token, address, postcode, uprn, skip_os, lmk_key, n_old_epcs):
-        """
-        Integration test for `find_property`, making actual API calls.
-        """
-        # Provide your actual API keys or tokens here
-        os_api_key = ""
-
-        # Initialize the SearchEpc instance
-        epc_searcher = SearchEpc(
-            address1=address,
-            postcode=postcode,
-            uprn=uprn,
-            auth_token=epc_auth_token,
-            os_api_key=os_api_key,
-        )
-
-        # Execute the method
-        epc_searcher.find_property(skip_os=skip_os)
-
-        # We check that we have the correct epc
-        assert epc_searcher.newest_epc["lmk-key"] == lmk_key
-        assert len(epc_searcher.older_epcs) == n_old_epcs
-
     def test_search_housenumber(self):
-        eg1 = 'Flat A11, Mortimer House, Grendon Road, Exeter'
+        eg1 = "Flat A11, Mortimer House, Grendon Road, Exeter"
         res1 = SearchEpc.get_house_number(eg1, None)
         assert res1 == "A11"
 
-        eg2 = 'Flat A9, Mortimer House, Grendon Road, Exeter, EX1 2NL'
+        eg2 = "Flat A9, Mortimer House, Grendon Road, Exeter, EX1 2NL"
         res2 = SearchEpc.get_house_number(eg2, None)
         assert res2 == "A9"
diff --git a/domain/landlord_description_overrides/__init__.py b/domain/epc/__init__.py
similarity index 100%
rename from domain/landlord_description_overrides/__init__.py
rename to domain/epc/__init__.py
diff --git a/domain/landlord_description_overrides/built_form_type.py b/domain/epc/built_form_type.py
similarity index 100%
rename from domain/landlord_description_overrides/built_form_type.py
rename to domain/epc/built_form_type.py
diff --git a/domain/landlord_description_overrides/property_type.py b/domain/epc/property_type.py
similarity index 100%
rename from domain/landlord_description_overrides/property_type.py
rename to domain/epc/property_type.py
diff --git a/domain/landlord_description_overrides/roof_type.py b/domain/epc/roof_type.py
similarity index 100%
rename from domain/landlord_description_overrides/roof_type.py
rename to domain/epc/roof_type.py
diff --git a/domain/landlord_description_overrides/wall_type.py b/domain/epc/wall_type.py
similarity index 100%
rename from domain/landlord_description_overrides/wall_type.py
rename to domain/epc/wall_type.py
diff --git a/domain/landlord_description_overrides/wall_type_construction_dates.py b/domain/epc/wall_type_construction_dates.py
similarity index 97%
rename from domain/landlord_description_overrides/wall_type_construction_dates.py
rename to domain/epc/wall_type_construction_dates.py
index 4cd869b3..0eccc44c 100644
--- a/domain/landlord_description_overrides/wall_type_construction_dates.py
+++ b/domain/epc/wall_type_construction_dates.py
@@ -27,7 +27,7 @@ from __future__ import annotations
 from dataclasses import dataclass
 from typing import Mapping, Optional
 
-from domain.landlord_description_overrides.wall_type import WallType
+from domain.epc.wall_type import WallType
 
 
 @dataclass(frozen=True)
diff --git a/infrastructure/chatgpt/chatgpt_column_classifier.py b/infrastructure/chatgpt/chatgpt_column_classifier.py
index 2ce66299..15389184 100644
--- a/infrastructure/chatgpt/chatgpt_column_classifier.py
+++ b/infrastructure/chatgpt/chatgpt_column_classifier.py
@@ -4,7 +4,7 @@ import json
 from enum import Enum
 from typing import Any, Optional, TypeVar
 
-from domain.landlord_description_overrides.column_classifier import (
+from infrastructure.column_classifier import (
     ClassificationError,
     ColumnClassifier,
 )
diff --git a/domain/landlord_description_overrides/column_classifier.py b/infrastructure/column_classifier.py
similarity index 100%
rename from domain/landlord_description_overrides/column_classifier.py
rename to infrastructure/column_classifier.py
diff --git a/infrastructure/postgres/landlord_built_form_type_override_postgres_repository.py b/infrastructure/postgres/landlord_built_form_type_override_postgres_repository.py
index 0f7d4959..aec4ea4d 100644
--- a/infrastructure/postgres/landlord_built_form_type_override_postgres_repository.py
+++ b/infrastructure/postgres/landlord_built_form_type_override_postgres_repository.py
@@ -17,7 +17,7 @@ from sqlalchemy import Table
 from sqlalchemy.dialects.postgresql import insert as pg_insert
 from sqlmodel import Session
 
-from domain.landlord_description_overrides.built_form_type import BuiltFormType
+from domain.epc.built_form_type import BuiltFormType
 from infrastructure.postgres.landlord_built_form_type_override_table import (
     LandlordBuiltFormTypeOverrideRow,
 )
diff --git a/infrastructure/postgres/landlord_built_form_type_override_table.py b/infrastructure/postgres/landlord_built_form_type_override_table.py
index a1f89c35..ec93ba27 100644
--- a/infrastructure/postgres/landlord_built_form_type_override_table.py
+++ b/infrastructure/postgres/landlord_built_form_type_override_table.py
@@ -16,7 +16,7 @@ from sqlalchemy import BigInteger, Column, UniqueConstraint
 from sqlalchemy import Enum as SAEnum
 from sqlmodel import Field, SQLModel
 
-from domain.landlord_description_overrides.built_form_type import BuiltFormType
+from domain.epc.built_form_type import BuiltFormType
 from infrastructure.postgres.landlord_override_enums import override_source_sa_enum
 
 
diff --git a/infrastructure/postgres/landlord_property_type_override_postgres_repository.py b/infrastructure/postgres/landlord_property_type_override_postgres_repository.py
index 18592c5f..3cd7dbb2 100644
--- a/infrastructure/postgres/landlord_property_type_override_postgres_repository.py
+++ b/infrastructure/postgres/landlord_property_type_override_postgres_repository.py
@@ -19,7 +19,7 @@ from sqlalchemy import Table
 from sqlalchemy.dialects.postgresql import insert as pg_insert
 from sqlmodel import Session
 
-from domain.landlord_description_overrides.property_type import PropertyType
+from domain.epc.property_type import PropertyType
 from infrastructure.postgres.landlord_override_enums import OverrideSource
 from infrastructure.postgres.landlord_property_type_override_table import (
     LandlordPropertyTypeOverrideRow,
diff --git a/infrastructure/postgres/landlord_property_type_override_table.py b/infrastructure/postgres/landlord_property_type_override_table.py
index b76d508e..ae9377cd 100644
--- a/infrastructure/postgres/landlord_property_type_override_table.py
+++ b/infrastructure/postgres/landlord_property_type_override_table.py
@@ -14,7 +14,7 @@ from sqlalchemy import BigInteger, Column, UniqueConstraint
 from sqlalchemy import Enum as SAEnum
 from sqlmodel import Field, SQLModel
 
-from domain.landlord_description_overrides.property_type import PropertyType
+from domain.epc.property_type import PropertyType
 from infrastructure.postgres.landlord_override_enums import override_source_sa_enum
 
 
diff --git a/infrastructure/postgres/landlord_roof_type_override_postgres_repository.py b/infrastructure/postgres/landlord_roof_type_override_postgres_repository.py
index b5b570bc..c3f263a9 100644
--- a/infrastructure/postgres/landlord_roof_type_override_postgres_repository.py
+++ b/infrastructure/postgres/landlord_roof_type_override_postgres_repository.py
@@ -17,7 +17,7 @@ from sqlalchemy import Table
 from sqlalchemy.dialects.postgresql import insert as pg_insert
 from sqlmodel import Session
 
-from domain.landlord_description_overrides.roof_type import RoofType
+from domain.epc.roof_type import RoofType
 from infrastructure.postgres.landlord_override_enums import OverrideSource
 from infrastructure.postgres.landlord_roof_type_override_table import (
     LandlordRoofTypeOverrideRow,
diff --git a/infrastructure/postgres/landlord_roof_type_override_table.py b/infrastructure/postgres/landlord_roof_type_override_table.py
index f0cea945..58bd61ff 100644
--- a/infrastructure/postgres/landlord_roof_type_override_table.py
+++ b/infrastructure/postgres/landlord_roof_type_override_table.py
@@ -16,7 +16,7 @@ from sqlalchemy import BigInteger, Column, UniqueConstraint
 from sqlalchemy import Enum as SAEnum
 from sqlmodel import Field, SQLModel
 
-from domain.landlord_description_overrides.roof_type import RoofType
+from domain.epc.roof_type import RoofType
 from infrastructure.postgres.landlord_override_enums import override_source_sa_enum
 
 
diff --git a/infrastructure/postgres/landlord_wall_type_override_postgres_repository.py b/infrastructure/postgres/landlord_wall_type_override_postgres_repository.py
index 21b73e98..711e5c30 100644
--- a/infrastructure/postgres/landlord_wall_type_override_postgres_repository.py
+++ b/infrastructure/postgres/landlord_wall_type_override_postgres_repository.py
@@ -17,7 +17,7 @@ from sqlalchemy import Table
 from sqlalchemy.dialects.postgresql import insert as pg_insert
 from sqlmodel import Session
 
-from domain.landlord_description_overrides.wall_type import WallType
+from domain.epc.wall_type import WallType
 from infrastructure.postgres.landlord_override_enums import OverrideSource
 from infrastructure.postgres.landlord_wall_type_override_table import (
     LandlordWallTypeOverrideRow,
diff --git a/infrastructure/postgres/landlord_wall_type_override_table.py b/infrastructure/postgres/landlord_wall_type_override_table.py
index 79bea46a..b5097164 100644
--- a/infrastructure/postgres/landlord_wall_type_override_table.py
+++ b/infrastructure/postgres/landlord_wall_type_override_table.py
@@ -16,7 +16,7 @@ from sqlalchemy import BigInteger, Column, UniqueConstraint
 from sqlalchemy import Enum as SAEnum
 from sqlmodel import Field, SQLModel
 
-from domain.landlord_description_overrides.wall_type import WallType
+from domain.epc.wall_type import WallType
 from infrastructure.postgres.landlord_override_enums import override_source_sa_enum
 
 
diff --git a/orchestration/classifiable_column.py b/orchestration/classifiable_column.py
index fb1dab6e..9b6fda10 100644
--- a/orchestration/classifiable_column.py
+++ b/orchestration/classifiable_column.py
@@ -4,7 +4,7 @@ from dataclasses import dataclass
 from enum import Enum
 from typing import Generic, TypeVar
 
-from domain.landlord_description_overrides.column_classifier import ColumnClassifier
+from infrastructure.column_classifier import ColumnClassifier
 from repositories.landlord_overrides.landlord_override_repository import (
     LandlordOverrideRepository,
 )
diff --git a/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py b/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py
index 4cdf4dfe..0462f3ce 100644
--- a/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py
+++ b/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py
@@ -4,9 +4,9 @@ from typing import Optional
 
 import pytest
 
-from domain.landlord_description_overrides.column_classifier import ClassificationError
-from domain.landlord_description_overrides.property_type import PropertyType
-from domain.landlord_description_overrides.wall_type import WallType
+from infrastructure.column_classifier import ClassificationError
+from domain.epc.property_type import PropertyType
+from domain.epc.wall_type import WallType
 from infrastructure.chatgpt.chatgpt import ChatGPT
 from infrastructure.chatgpt.chatgpt_column_classifier import (
     ChatGptColumnClassifier,
diff --git a/tests/orchestration/test_landlord_description_overrides_orchestrator.py b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
index d05b5911..18188941 100644
--- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py
+++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
@@ -4,10 +4,10 @@ from enum import Enum
 from typing import Any, Optional
 
 from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
-from domain.landlord_description_overrides.built_form_type import BuiltFormType
-from domain.landlord_description_overrides.column_classifier import ColumnClassifier
-from domain.landlord_description_overrides.property_type import PropertyType
-from domain.landlord_description_overrides.wall_type import WallType
+from domain.epc.built_form_type import BuiltFormType
+from infrastructure.column_classifier import ColumnClassifier
+from domain.epc.property_type import PropertyType
+from domain.epc.wall_type import WallType
 from domain.postcode import Postcode
 from orchestration.classifiable_column import ClassifiableColumn
 from orchestration.landlord_description_overrides_orchestrator import (
diff --git a/tests/repositories/landlord_overrides/postgres/test_landlord_property_type_override_postgres_repository.py b/tests/repositories/landlord_overrides/postgres/test_landlord_property_type_override_postgres_repository.py
index 9154b664..c2b81293 100644
--- a/tests/repositories/landlord_overrides/postgres/test_landlord_property_type_override_postgres_repository.py
+++ b/tests/repositories/landlord_overrides/postgres/test_landlord_property_type_override_postgres_repository.py
@@ -15,7 +15,7 @@ import pytest
 from sqlalchemy import Engine
 from sqlmodel import Session, select
 
-from domain.landlord_description_overrides.property_type import PropertyType
+from domain.epc.property_type import PropertyType
 from infrastructure.postgres.landlord_override_enums import OverrideSource
 from infrastructure.postgres.landlord_property_type_override_postgres_repository import (
     LandlordPropertyTypeOverridePostgresRepository,
diff --git a/tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py b/tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py
index 4cee6f5a..9504a520 100644
--- a/tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py
+++ b/tests/repositories/landlord_overrides/postgres/test_landlord_wall_type_override_postgres_repository.py
@@ -14,7 +14,7 @@ import pytest
 from sqlalchemy import Engine
 from sqlmodel import Session, select
 
-from domain.landlord_description_overrides.wall_type import WallType
+from domain.epc.wall_type import WallType
 from infrastructure.postgres.landlord_override_enums import OverrideSource
 from infrastructure.postgres.landlord_wall_type_override_postgres_repository import (
     LandlordWallTypeOverridePostgresRepository,