Merge branch 'main' of https://github.com/Hestia-Homes/Model into feature/per-cert-mapper-validation

2026-07-27 23:35:01 +00:00 · 2026-06-02 16:10:41 +00:00 · 2026-06-02 16:10:41 +00:00 · 69995edec8
commit 69995edec8
parent d1c87d84b0 ed8751b715
97 changed files with 3494 additions and 351 deletions
--- a/.devcontainer/backend/requirements.txt
+++ b/.devcontainer/backend/requirements.txt
@ -27,3 +27,4 @@ pytest-postgresql
 # Formatting
 black==26.1.0
 boto3-stubs
+openai
--- a/.github/workflows/_deploy_lambda.yml
+++ b/.github/workflows/_deploy_lambda.yml
@ -92,6 +92,9 @@ on:

      TF_VAR_magicplan_api_key:
        required: false
+
+      TF_VAR_openai_api_key:
+        required: false
 jobs:
  deploy:
    runs-on: ubuntu-latest
@ -163,6 +166,7 @@ jobs:
          TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }}
          TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }}
          TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }}
+          TF_VAR_openai_api_key: ${{ secrets.TF_VAR_openai_api_key }}
        run: |
          ECR_REPO_URL_VAR=""
          if [[ -n "${{ inputs.ecr_repo }}" ]]; then
@ -213,6 +217,7 @@ jobs:
          TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }}
          TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }}
          TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }}
+          TF_VAR_openai_api_key: ${{ secrets.TF_VAR_openai_api_key }}
        run: |
          EXTRA_VARS=""
          if [[ -n "${{ inputs.ecr_repo }}" ]]; then
--- a/.github/workflows/ddd_tests.yml
+++ b/.github/workflows/ddd_tests.yml
@ -0,0 +1,32 @@
+name: Run DDD tests
+
+on:
+  pull_request:
+    branches:
+      - "**"
+
+# The DDD rewrite (tests/) defines SQLModel table classes that map to the same
+# physical tables as the legacy backend models. Both sets share the one global
+# SQLModel.metadata, so they cannot be imported into the same pytest process —
+# hence this runs as a separate workflow from unit_tests.yml until the legacy
+# models are retired. It also covers the Lambda packaging linter
+# (tests/test_lambda_packaging.py). Its DB is spawned in-process by
+# pytest-postgresql, so no `services: postgres` and no env secrets are required,
+# and it runs independently of (and in parallel with) the legacy unit tests.
+jobs:
+  ddd-tests:
+    name: DDD tests (Docker)
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Build test image
+        run: docker build -f Dockerfile.test -t model-test .
+
+      - name: Run DDD tests
+        run: |
+          docker run --rm \
+            --network host \
+            model-test pytest -vv tests/
--- a/.github/workflows/deploy_terraform.yml
+++ b/.github/workflows/deploy_terraform.yml
@ -203,6 +203,47 @@ jobs:
      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}

+  # ============================================================
+  # Build Landlord Description Overrides image and Push
+  # ============================================================
+  landlordDescriptionOverrides_image:
+    needs: [determine_stage, shared_terraform]
+    uses: ./.github/workflows/_build_image.yml
+    with:
+      ecr_repo: landlord_description_overrides-${{ needs.determine_stage.outputs.stage }}
+      dockerfile_path: applications/landlord_description_overrides/Dockerfile
+      build_context: .
+      build_args: |
+        DEV_DB_HOST=$DEV_DB_HOST
+        DEV_DB_PORT=$DEV_DB_PORT
+        DEV_DB_NAME=$DEV_DB_NAME
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+      DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
+      DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
+      DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
+
+  # ============================================================
+  # Deploy Landlord Description Overrides Lambda
+  # ============================================================
+  landlordDescriptionOverrides_lambda:
+    needs: [landlordDescriptionOverrides_image, determine_stage]
+    uses: ./.github/workflows/_deploy_lambda.yml
+    with:
+      lambda_name: landlordDescriptionOverrides
+      lambda_path: deployment/terraform/lambda/landlordDescriptionOverrides
+      stage: ${{ needs.determine_stage.outputs.stage }}
+      ecr_repo: landlord_description_overrides-${{ needs.determine_stage.outputs.stage }}
+      image_digest: ${{ needs.landlordDescriptionOverrides_image.outputs.image_digest }}
+      terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
+    secrets:
+      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
+      AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
+      TF_VAR_openai_api_key: ${{ secrets.DEV_OPENAI_API_KEY }}
+
  # ============================================================
  # Build Bulk Address2UPRN Combiner image and Push
  # ============================================================
--- a/.github/workflows/lambda_smoke_tests.yml
+++ b/.github/workflows/lambda_smoke_tests.yml
@ -43,6 +43,16 @@ jobs:
      build_context: .
      service_name: postcode-splitter-ddd

+  # ============================================================
+  # Landlord Description Overrides
+  # ============================================================
+  landlord_description_overrides_smoke_test:
+    uses: ./.github/workflows/_smoke_test_lambda.yml
+    with:
+      dockerfile_path: applications/landlord_description_overrides/Dockerfile
+      build_context: .
+      service_name: landlord-description-overrides
+
  # ============================================================
  # Bulk Address2UPRN Combiner
  # ============================================================
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@ -61,14 +61,7 @@ jobs:
            -e DB_PORT=5432 \
            model-test pytest -vv -m 'not integration'

-      # The DDD rewrite (tests/) defines SQLModel table classes that map to the
-      # same physical tables as the legacy backend models. Both sets share the
-      # one global SQLModel.metadata, so they cannot be imported into the same
-      # pytest process. It runs as a separate invocation until the legacy
-      # models are retired. Its DB is spawned in-process by pytest-postgresql,
-      # so no DB service or env is required.
-      - name: Run DDD tests
-        run: |
-          docker run --rm \
-            --network host \
-            model-test pytest -vv tests/
+      # The DDD rewrite (tests/) runs in its own workflow (ddd_tests.yml): its
+      # SQLModel table classes map to the same physical tables as the legacy
+      # backend models and share the one global SQLModel.metadata, so the two
+      # suites cannot be imported into the same pytest process.
--- a/UBIQUITOUS_LANGUAGE.md
+++ b/UBIQUITOUS_LANGUAGE.md
@ -1,7 +1,84 @@
 # Ubiquitous Language

-This file has been **superseded by [CONTEXT.md](./CONTEXT.md)**.
+Domain terminology glossary for this project. Generated and maintained by the `/ubiquitous-language` Claude Code skill.

-The project's domain glossary now lives at the repo root in `CONTEXT.md`, maintained by the `/grill-with-docs` skill (which replaced `/ubiquitous-language`).
+Invoke `/ubiquitous-language` in any session to extract new terms from the conversation, flag ambiguities, and update this file with canonical definitions.

-If you arrived here from a link in `CLAUDE.md` or older docs, follow the link above. This file is kept only to preserve git history and may be removed once internal references are updated.
+---
+
+## Energy Performance Certificates
+
+| Term | Definition | Aliases to avoid |
+|------|------------|------------------|
+| **EPC** | An Energy Performance Certificate — a government-issued document rating a dwelling's energy efficiency from A (best) to G (worst). | "energy certificate", "energy report" |
+| **Certificate Number** | The unique identifier assigned to an EPC by the government registry. | "cert number", "EPC ID" |
+| **Registration Date** | The date an EPC was lodged with the government register; used to identify the most recent certificate for a property. | "assessment date", "submission date" |
+| **EPC Band** | A single letter A–G representing a property's current or potential energy efficiency rating. | "energy rating", "EPC grade", "EPC score" |
+| **Schema Type** | The versioned RdSAP or SAP schema that describes the structure of a certificate's raw data (e.g. `RdSAP-Schema-21.0.1`). | "schema version", "EPC format" |
+| **Domestic Certificate** | An EPC issued for a residential dwelling, as opposed to a commercial one. | "residential EPC", "home EPC" |
+
+## Properties and Addresses
+
+| Term | Definition | Aliases to avoid |
+|------|------------|------------------|
+| **UPRN** | Unique Property Reference Number — the government-issued permanent identifier for a physical address in the UK. | "property ID", "address ID", "code" |
+| **Postcode** | A UK postal code used to group nearby addresses; the primary search key for finding EPC records. | "zip code", "postal code" |
+| **Unstandardised Address** | A frozen dataclass (`domain.addresses.unstandardised_address.UnstandardisedAddress`) capturing a single address exactly as a customer supplied it, before any standardisation: a free-text `address` line (intentionally NOT normalised), a canonical `postcode` (a `Postcode` value object, sanitised on construction), an optional `org_reference` (the customer's own identifier for the property), and `additional_info` (the full source row — every column of the customer's upload, preserved verbatim). | "user address", "asset list", "raw address", "landlord address", "Hyde address" |
+| **Address List** | A nominal `NewType` over `list[UnstandardisedAddress]` (`domain.addresses.unstandardised_address.AddressList`) — a batch of unstandardised addresses, such as one customer's bulk-onboarding upload or a postcode-grouped sub-batch produced for downstream processing. Being nominal, it is constructed explicitly: `AddressList([...])`. It is the raw *input* to ingestion; the standardised *output* is a **Standardised Asset List**. | "asset list", "Hyde address list", "user addresses" |
+| **Standardised Asset List (SAL)** | A customer's property portfolio after ingestion has cleaned and standardised it — each property carrying a canonical field set (UPRN, standardised address, postcode, property type, built form, …). It is the standardised *output* of the pipeline whose raw *input* is an **Address List** of **Unstandardised Addresses**; generated by the `SALOrchestrator`. (Legacy implementation: `asset_list.AssetList` via `load_standardised_asset_list`.) | "address list" (that is the raw input), "asset register", "portfolio list" |
+| **Dwelling** | A single residential unit that can hold an EPC — a house, flat, or maisonette. | "property", "unit", "home" |
+
+## Address Matching
+
+| Term | Definition | Aliases to avoid |
+|------|------------|------------------|
+| **Lexiscore** | A similarity score in [0, 1] between an unstandardised address and a candidate EPC address; combines token overlap and character-level similarity. | "score", "match score", "similarity" |
+| **Lexirank** | Dense rank of candidates sorted by lexiscore descending; rank 1 = best match. | "rank", "position" |
+| **UPRN Candidate** | An EPC search result that is a plausible match for a given unstandardised address, before scoring decides the winner. | "match candidate", "result" |
+| **Score Threshold** | The minimum lexiscore (currently 0.6) below which no match is returned even if a candidate exists. | "minimum score", "cutoff" |
+| **Ambiguous Match** | A matching outcome where two or more candidates share lexirank 1, making it impossible to select a unique winner. | "tie", "draw", "duplicate" |
+| **Best Match** | The single UPRN candidate with lexirank 1 that meets or exceeds the score threshold. | "winner", "top result" |
+
+## API and Integration
+
+| Term | Definition | Aliases to avoid |
+|------|------------|------------------|
+| **EPC Search Result** | A lightweight record returned by the government domestic search endpoint — contains address lines, postcode, UPRN, band, and certificate number but not the full certificate data. | "search row", "EPC row", "result" |
+| **EPC Property Data** | The fully mapped domain object produced after fetching and parsing a complete EPC certificate. | "EPC data", "certificate data", "parsed EPC" |
+| **Old EPC API** | The retired government API (`epc.opendatacommunities.org`) using HTTP Basic auth; decommissioned May 2026. | "legacy API" |
+| **New EPC API** | The replacement government API (`api.get-energy-performance-data.communities.gov.uk`) using Bearer token auth. | "new API", "current API" |
+| **Bearer Token** | The auth credential required by the new EPC API; stored in the `EPC_AUTH_TOKEN` environment variable. | "API key", "auth token", "secret" |
+
+## Relationships
+
+- An **EPC** belongs to exactly one **Dwelling** and has one **Certificate Number**.
+- A **Dwelling** may have multiple **EPCs** across time; the one with the most recent **Registration Date** is the current one.
+- A **UPRN** identifies a **Dwelling** permanently; it does not change when the property changes owner.
+- An **EPC Search Result** is a summary; it points to a full **EPC** via its **Certificate Number**.
+- An **Address List** is an ordered batch of **Unstandardised Addresses**; a customer's bulk-onboarding upload arrives as one.
+- Ingestion turns an **Address List** (raw input) into a **Standardised Asset List** (standardised output) — the **SAL Orchestrator** drives this.
+- **Address Matching** uses an **Unstandardised Address** and **Postcode** to find a **UPRN** by scoring **UPRN Candidates** from an EPC search.
+- A **Lexirank** of 1 with no **Ambiguous Match** and a **Lexiscore** ≥ the **Score Threshold** produces a **Best Match**.
+
+## Example dialogue
+
+> **Dev:** "We have an unstandardised address and postcode. How do we find the UPRN?"
+
+> **Domain expert:** "Search the **New EPC API** by **Postcode** — you get back a list of **EPC Search Results** for that area. Each one has an address and a **UPRN**. Score each against the **Unstandardised Address** using the **Lexiscore**. If the top **UPRN Candidate** scores above the **Score Threshold** and there's no **Ambiguous Match**, that's your **Best Match**."
+
+> **Dev:** "What if two results share the same address line 1?"
+
+> **Domain expert:** "That's an **Ambiguous Match** — two candidates at **Lexirank** 1. Fall back to scoring on the full address using all address lines joined together. If that still ties, return nothing."
+
+> **Dev:** "Once we have the best match, do we use the UPRN or fetch the full EPC?"
+
+> **Domain expert:** "Depends on what you need. The **EPC Search Result** gives you the **EPC Band** and **Certificate Number**. If you need energy efficiency detail, use the **Certificate Number** to fetch the full **EPC Property Data**."
+
+## Flagged ambiguities
+
+- **"address"** appears in several senses: the **Unstandardised Address** dataclass (one customer-supplied address before standardisation), its free-text `address` field, and the normalised address lines on an **EPC Search Result**. Always qualify: "unstandardised address" vs "EPC address" or "address line 1". Within `domain/addresses/`, the dataclass is **Unstandardised Address**; in upstream ingestion contexts (CSV columns, SQS payloads) "address" may still mean the bare free-text string.
+- **"score"** is used for the `AddressMatch.score()` function output, the `lexiscore` DataFrame column, and informally in conversation. Prefer **Lexiscore** in domain discussions; reserve "score" for method-level code comments.
+- **"user_inputed_address"** (and `user_address`) in `backend/address2UPRN/` is legacy naming — a misspelled synonym for what is now the **Unstandardised Address**. That address-matching code has not been renamed; new code should use **Unstandardised Address**.
+- **"Hyde address list"** — "Hyde" is the name of one customer, not a domain concept. A domain expert may say "the Hyde address list" because Hyde is the customer in front of them, but the generalised term is **Address List** (and **Unstandardised Address** for a single item). A customer's identity is data — it belongs in `org_reference` or `additional_info`, never in a type or module name.
+- **"address list"** vs **"asset list"** — opposite ends of the ingestion pipeline; do not conflate them. An **Address List** is the raw *input* (unstandardised addresses as the customer supplied them); a **Standardised Asset List** is the standardised *output*. The historical `AssetList` dataclass (now **Unstandardised Address**) misnamed the input an "asset list" — that mistake is what the rename corrected.
+- **"EPC"** is overloaded as both the document (an Energy Performance Certificate) and the rating band letter. Use **EPC** for the document and **EPC Band** for the letter.
--- a/applications/ara_first_run/Dockerfile
+++ b/applications/ara_first_run/Dockerfile
@ -25,6 +25,9 @@ COPY infrastructure/ infrastructure/
 COPY orchestration/ orchestration/
 COPY repositories/ repositories/
 COPY utilities/ utilities/
+# domain.property.site_notes -> datatypes.epc.domain.epc_property_data; without
+# this the lambda fails at init with "No module named 'datatypes'".
+COPY datatypes/ datatypes/
 COPY applications/ applications/

 # Place the handler at the Lambda task root so the runtime can resolve
--- a/applications/landlord_description_overrides/Dockerfile
+++ b/applications/landlord_description_overrides/Dockerfile
@ -0,0 +1,34 @@
+FROM public.ecr.aws/lambda/python:3.11
+
+# Postgres host/port/database are baked into the image at build time from
+# the deploy workflow's --build-arg values (GitHub Actions DEV_DB_* secrets),
+# mirroring backend/postcode_splitter/handler/Dockerfile. They map onto the
+# POSTGRES_* names PostgresConfig.from_env reads. Username/password are NOT
+# baked in -- Terraform injects those as Lambda env vars from Secrets Manager.
+ARG DEV_DB_HOST
+ARG DEV_DB_PORT
+ARG DEV_DB_NAME
+
+ENV POSTGRES_HOST=${DEV_DB_HOST}
+ENV POSTGRES_PORT=${DEV_DB_PORT}
+ENV POSTGRES_DATABASE=${DEV_DB_NAME}
+
+WORKDIR /var/task
+
+COPY applications/landlord_description_overrides/requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the layered source the handler imports from. The new splitter pulls
+# only DDD-shaped packages — no pandas, no legacy backend/.
+COPY domain/ domain/
+COPY infrastructure/ infrastructure/
+COPY orchestration/ orchestration/
+COPY repositories/ repositories/
+COPY utilities/ utilities/
+COPY applications/ applications/
+
+# Place the handler at the Lambda task root so the runtime can resolve
+# ``main.handler`` without an extra package prefix.
+COPY applications/landlord_description_overrides/handler.py /var/task/main.py
+
+CMD ["main.handler"]
--- a/applications/landlord_description_overrides/handler.py
+++ b/applications/landlord_description_overrides/handler.py
@ -0,0 +1,169 @@
+import logging
+import os
+from typing import Any
+
+import boto3
+
+from applications.landlord_description_overrides.landlord_description_overrides_trigger_body import (
+    LandlordDescriptionOverridesTriggerBody,
+)
+from domain.epc.built_form_type import BuiltFormType
+from domain.epc.property_type import PropertyType
+from domain.epc.roof_type import RoofType
+from domain.epc.wall_type import WallType
+from domain.epc.wall_type_construction_dates import (
+    wall_type_construction_date_prompt_hint,
+)
+from infrastructure.chatgpt.chatgpt import ChatGPT
+from infrastructure.chatgpt.chatgpt_column_classifier import ChatGptColumnClassifier
+from infrastructure.landlord_overrides.landlord_overrides_postgres_repository import (
+    LandlordOverridesRepository,
+)
+from infrastructure.postgres.config import PostgresConfig
+from infrastructure.postgres.engine import commit_scope, make_engine, make_session
+from infrastructure.postgres.landlord_built_form_type_override_table import (
+    LandlordBuiltFormTypeOverrideRow,
+)
+from infrastructure.postgres.landlord_property_type_override_table import (
+    LandlordPropertyTypeOverrideRow,
+)
+from infrastructure.postgres.landlord_roof_type_override_table import (
+    LandlordRoofTypeOverrideRow,
+)
+from infrastructure.postgres.landlord_wall_type_override_table import (
+    LandlordWallTypeOverrideRow,
+)
+from infrastructure.s3.csv_s3_client import CsvS3Client
+from infrastructure.s3.s3_uri import parse_s3_uri
+from orchestration.classifiable_column import ClassifiableColumn
+from orchestration.landlord_description_overrides_orchestrator import (
+    LandlordDescriptionOverridesOrchestrator,
+)
+from orchestration.task_orchestrator import TaskOrchestrator
+from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import (
+    UnstandardisedAddressListCsvS3Repository,
+)
+from utilities.aws_lambda.subtask_handler import subtask_handler
+
+logger = logging.getLogger(__name__)
+
+
+def _build_columns(
+    column_mapping: dict[str, str], chat_gpt: ChatGPT, session: Any
+) -> list[ClassifiableColumn[Any]]:
+    """One ClassifiableColumn per mapped category.
+
+    ``column_mapping`` is ``{category -> source CSV header}``. One header may
+    feed several categories -- e.g. ``"Property Type"`` -> property_type and
+    built_form_type -- which falls out naturally because each is a separate
+    entry. Unknown categories are skipped.
+    """
+    factories = {
+        "property_type": lambda src: ClassifiableColumn(
+            name="property_type",
+            source_column=src,
+            classifier=ChatGptColumnClassifier(
+                chat_gpt, PropertyType, PropertyType.UNKNOWN
+            ),
+            repo=LandlordOverridesRepository[PropertyType](
+                session, LandlordPropertyTypeOverrideRow
+            ),
+        ),
+        "built_form_type": lambda src: ClassifiableColumn(
+            name="built_form_type",
+            source_column=src,
+            classifier=ChatGptColumnClassifier(
+                chat_gpt, BuiltFormType, BuiltFormType.UNKNOWN
+            ),
+            repo=LandlordOverridesRepository[BuiltFormType](
+                session, LandlordBuiltFormTypeOverrideRow
+            ),
+        ),
+        "wall_type": lambda src: ClassifiableColumn(
+            name="wall_type",
+            source_column=src,
+            classifier=ChatGptColumnClassifier(
+                chat_gpt,
+                WallType,
+                WallType.UNKNOWN,
+                extra_instructions=wall_type_construction_date_prompt_hint(),
+            ),
+            repo=LandlordOverridesRepository[WallType](
+                session, LandlordWallTypeOverrideRow
+            ),
+        ),
+        "roof_type": lambda src: ClassifiableColumn(
+            name="roof_type",
+            source_column=src,
+            classifier=ChatGptColumnClassifier(
+                chat_gpt, RoofType, RoofType.UNKNOWN
+            ),
+            repo=LandlordOverridesRepository[RoofType](
+                session, LandlordRoofTypeOverrideRow
+            ),
+        ),
+    }
+
+    columns: list[ClassifiableColumn[Any]] = []
+    for category, source_column in column_mapping.items():
+        factory = factories.get(category)
+        if factory is None:
+            logger.warning("Unknown classifier category %r; skipping.", category)
+            continue
+        columns.append(factory(source_column))
+    return columns
+
+
+@subtask_handler()
+def handler(
+    body: dict[str, Any], context: Any, task_orchestrator: TaskOrchestrator
+) -> dict[str, int]:
+    trigger = LandlordDescriptionOverridesTriggerBody.model_validate(body)
+
+    # The classifier reads a dedicated CSV of the classifier columns (raw
+    # landlord headers preserved), converted from the upload by the frontend, so
+    # the S3 bucket comes from the trigger URI rather than a fixed env var.
+    bucket, _key = parse_s3_uri(trigger.s3_uri)
+
+    # boto3.client is overloaded per-service in the installed stubs; cast to Any
+    # so the strict-mode checker treats it as opaque.
+    boto3_client: Any = (
+        boto3.client
+    )  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+    boto_s3: Any = boto3_client("s3")
+
+    csv_client = CsvS3Client(boto_s3, bucket)
+    unstandardised_address_repo = UnstandardisedAddressListCsvS3Repository(
+        csv_client, bucket
+    )
+
+    # Raw rows, not load_batch: the classifier CSV carries the description
+    # columns but not the canonical address/postcode columns load_batch requires.
+    rows = csv_client.read_rows(trigger.s3_uri)
+
+    engine = make_engine(PostgresConfig.from_env(os.environ))
+    # The session is built up front (SQLModel sessions are lazy, so no
+    # connection is checked out yet) and owned by this handler. Classification
+    # runs first and calls ChatGPT, which is slow; we deliberately keep no
+    # transaction open across it. Only the persistence below -- inside
+    # ``commit_scope`` -- holds a connection.
+    session = make_session(engine)
+    try:
+        chat_gpt = ChatGPT()
+        columns = _build_columns(trigger.column_mapping, chat_gpt, session)
+        orchestrator = LandlordDescriptionOverridesOrchestrator(
+            unstandardised_address_repo=unstandardised_address_repo,
+            columns=columns,
+        )
+
+        classified = orchestrator.classify_from_rows(rows)
+
+        with commit_scope(session):
+            orchestrator.persist(classified, portfolio_id=trigger.portfolio_id)
+    finally:
+        session.close()
+
+    counts = {name: len(mapping) for name, mapping in classified.items()}
+    for name, n in counts.items():
+        logger.info("Classified %d descriptions for column %r.", n, name)
+    return counts
--- a/applications/landlord_description_overrides/landlord_description_overrides_trigger_body.py
+++ b/applications/landlord_description_overrides/landlord_description_overrides_trigger_body.py
@ -0,0 +1,19 @@
+from uuid import UUID
+
+from pydantic import BaseModel, ConfigDict
+
+
+class LandlordDescriptionOverridesTriggerBody(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+    task_id: UUID
+    sub_task_id: UUID
+    s3_uri: str
+    # ``portfolio_id`` is ``bigint`` in the ``landlord_*_overrides`` schema --
+    # Python ``int`` is unbounded so the Pydantic side stays simple; the
+    # SQLModel row class pins the storage to ``BigInteger``.
+    portfolio_id: int
+    # category -> source CSV header (the classifier subset of the upload
+    # mapping). Defaulted so a malformed/empty message classifies nothing
+    # rather than failing validation.
+    column_mapping: dict[str, str] = {}
--- a/applications/landlord_description_overrides/local_handler/.env.local.example
+++ b/applications/landlord_description_overrides/local_handler/.env.local.example
@ -0,0 +1,5 @@
+POSTGRES_HOST=
+POSTGRES_PORT=5432
+POSTGRES_USERNAME=
+POSTGRES_PASSWORD=
+POSTGRES_DATABASE=
--- a/applications/landlord_description_overrides/local_handler/docker-compose.yml
+++ b/applications/landlord_description_overrides/local_handler/docker-compose.yml
@ -0,0 +1,9 @@
+services:
+  landlord_overrides:
+    build:
+      context: ../../../
+      dockerfile: applications/landlord_description_overrides/Dockerfile
+    ports:
+      - "9002:8080"
+    env_file:
+      - .env.local
--- a/applications/landlord_description_overrides/local_handler/invoke_local_lambda.py
+++ b/applications/landlord_description_overrides/local_handler/invoke_local_lambda.py
@ -0,0 +1,16 @@
+#!/usr/bin/env python3
+import json
+import requests
+
+HOST = "localhost"
+PORT = "9002"
+
+LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
+
+payload = {"Records": [{"body": json.dumps({})}]}
+
+response = requests.post(LAMBDA_URL, json=payload)
+
+print("Status code:", response.status_code)
+print("Response:")
+print(response.text)
--- a/applications/landlord_description_overrides/local_handler/run_local.sh
+++ b/applications/landlord_description_overrides/local_handler/run_local.sh
@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+
+if [ ! -f .env.local ]; then
+  cp .env.local.example .env.local
+  echo "Created .env.local from the template — fill it in, then re-run." >&2
+  exit 1
+fi
+
+docker compose build --no-cache
+docker compose up --force-recreate
--- a/applications/landlord_description_overrides/requirements.txt
+++ b/applications/landlord_description_overrides/requirements.txt
@ -0,0 +1,5 @@
+boto3
+pydantic
+sqlmodel
+psycopg2-binary
+openai==1.93.0
--- a/applications/postcode_splitter/handler.py
+++ b/applications/postcode_splitter/handler.py
@ -9,11 +9,11 @@ from applications.postcode_splitter.postcode_splitter_trigger_body import (
    PostcodeSplitterTriggerBody,
 )
 from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
-from infrastructure.csv_s3_client import CsvS3Client
+from infrastructure.s3.csv_s3_client import CsvS3Client
 from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
 from orchestration.task_orchestrator import TaskOrchestrator
-from repositories.user_address.user_address_csv_s3_repository import (
-    UserAddressCsvS3Repository,
+from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import (
+    UnstandardisedAddressListCsvS3Repository,
 )
 from utilities.aws_lambda.subtask_handler import subtask_handler

@ -29,17 +29,19 @@ def handler(

    # boto3.client is overloaded per-service in the installed stubs; cast
    # to Any so the strict-mode checker treats it as opaque.
-    boto3_client: Any = boto3.client  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+    boto3_client: Any = (
+        boto3.client
+    )  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
    boto_s3: Any = boto3_client("s3")
    boto_sqs: Any = boto3_client("sqs")

    csv_client = CsvS3Client(boto_s3, bucket)
-    user_address_repo = UserAddressCsvS3Repository(csv_client, bucket)
+    unstandardised_address_repo = UnstandardisedAddressListCsvS3Repository(csv_client, bucket)
    queue_client = Address2UprnQueueClient(boto_sqs, queue_url)

    splitter = PostcodeSplitterOrchestrator(
        task_orchestrator=task_orchestrator,
-        user_address_repo=user_address_repo,
+        unstandardised_address_repo=unstandardised_address_repo,
        queue_client=queue_client,
    )

--- a/backend/address2UPRN/handler/Dockerfile
+++ b/backend/address2UPRN/handler/Dockerfile
@ -32,6 +32,12 @@ RUN pip install --no-cache-dir -r requirements.txt
 COPY utils/ utils/
 COPY backend/ backend/
 COPY datatypes/ datatypes/
+# main.py imports infrastructure.epc_client.epc_client_service (EpcClientService);
+# without this the lambda fails at init with "No module named 'infrastructure'".
+COPY infrastructure/ infrastructure/
+# EpcClientService -> datatypes.epc.domain.mapper -> domain.sap10_calculator;
+# without this the lambda fails at init with "No module named 'domain'".
+COPY domain/ domain/

 # Copy the handler
 COPY backend/address2UPRN/main.py .
--- a/backend/app/bulk_uploads/router.py
+++ b/backend/app/bulk_uploads/router.py
@ -13,6 +13,7 @@ from backend.app.bulk_uploads.schema import (
    CombinedResultsResponse,
    CombinerTriggerRequest,
    FlagsSummary,
+    LandlordOverridesTriggerRequest,
    PostcodeSplitterTriggerRequest,
 )
 from backend.app.bulk_uploads.scoring import score_bucket
@ -92,6 +93,26 @@ async def trigger_combiner(req: CombinerTriggerRequest):
    }


+@router.post("/trigger-landlord-overrides", status_code=202)
+async def trigger_landlord_overrides(req: LandlordOverridesTriggerRequest):
+    settings = get_settings()
+
+    try:
+        sqs = boto3.client("sqs", settings.AWS_DEFAULT_REGION)
+        response = sqs.send_message(
+            QueueUrl=settings.LANDLORD_OVERRIDES_SQS_URL,
+            MessageBody=req.model_dump_json(),
+        )
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"SQS error: {e}")
+
+    return {
+        "task_id": req.task_id,
+        "sub_task_id": req.sub_task_id,
+        "sqs_message_id": response.get("MessageId"),
+    }
+
+
@router.get("/{task_id}/combined-results", response_model=CombinedResultsResponse)
 async def get_combined_results(
    task_id: UUID,
--- a/backend/app/bulk_uploads/schema.py
+++ b/backend/app/bulk_uploads/schema.py
@ -14,6 +14,15 @@ class CombinerTriggerRequest(BaseModel):
    sub_task_id: str


+class LandlordOverridesTriggerRequest(BaseModel):
+    task_id: str
+    sub_task_id: str
+    s3_uri: str
+    portfolio_id: int
+    # category -> source CSV header (the classifier subset of the upload mapping)
+    column_mapping: dict[str, str]
+
+
 class FlagsSummary(BaseModel):
    duplicates: int
    missing: int
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -42,6 +42,7 @@ class Settings(BaseSettings):
    MAGICPLAN_SQS_URL: str = "changeme"
    POSTCODE_SPLITTER_SQS_URL: str = "changeme"
    COMBINER_SQS_URL: str = "changeme"
+    LANDLORD_OVERRIDES_SQS_URL: str = "changeme"

    # Third parties
    EPC_AUTH_TOKEN: str = "changeme"
--- a/backend/docker/engine.Dockerfile
+++ b/backend/docker/engine.Dockerfile
@ -40,6 +40,9 @@ COPY --from=build-image /usr/local/lib/python3.11/site-packages/ /usr/local/lib/
 COPY ./backend/ ./backend
 COPY ./recommendations/ ./recommendations
 COPY ./utils/ ./utils/
+# engine.py -> backend.apis.GoogleSolarApi -> infrastructure.solar; without this
+# the lambda fails at init with "No module named 'infrastructure'".
+COPY ./infrastructure/ ./infrastructure/
 COPY ./etl/epc/ ./etl/epc/
 COPY ./etl/epc_clean/ ./etl/epc_clean/
 COPY ./etl/bill_savings/ ./etl/bill_savings/
--- a/backend/ecmk_fetcher/handler/Dockerfile
+++ b/backend/ecmk_fetcher/handler/Dockerfile
@ -13,6 +13,12 @@ RUN pip install --no-cache-dir -r requirements.txt
 COPY utils/ utils/
 COPY backend/ backend/
 COPY datatypes/ datatypes/
+# handler -> ecmk_service -> documents_parser.parser -> datatypes.epc.domain.mapper
+# -> domain.sap10_calculator, and -> db_writer -> epc_property model
+# -> infrastructure.postgres. Without these the lambda fails at init with
+# "No module named 'domain'" / "'infrastructure'".
+COPY domain/ domain/
+COPY infrastructure/ infrastructure/

 # Local lambda entrypoint 
 # ENTRYPOINT ["/usr/local/bin/aws-lambda-rie", "python", "-m", "awslambdaric"]
--- a/backend/pashub_fetcher/handler/Dockerfile
+++ b/backend/pashub_fetcher/handler/Dockerfile
@ -10,6 +10,12 @@ WORKDIR /var/task
 COPY utils/ utils/
 COPY backend/ backend/
 COPY datatypes/ datatypes/
+# handler -> pashub_service -> documents_parser.parser -> datatypes.epc.domain.mapper
+# -> domain.sap10_calculator, and -> db_writer -> epc_property model
+# -> infrastructure.postgres. Without these the lambda fails at init with
+# "No module named 'domain'" / "'infrastructure'".
+COPY domain/ domain/
+COPY infrastructure/ infrastructure/

 COPY backend/pashub_fetcher/handler/requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
--- a/backend/tests/test_search_epc.py
+++ b/backend/tests/test_search_epc.py
@ -14,55 +14,11 @@ class TestSearchEpcIntegration:
    def epc_auth_token(self):
        return os.getenv("EPC_AUTH_TOKEN")

-    @pytest.mark.parametrize(
-        "address, postcode, uprn, skip_os, lmk_key, n_old_epcs",
-        [
-            # Test case 1: Valid address and postcode, skipping OS
-            # In this case, the property is an individual flat but the uprn associated to the
-            # EPC is for the building as a whole, possibly because there was a conversion of sorts
-            ("Garden Flat, 48 Bedminster Parade", "BS3 4HS", 308249, True,
-             "260907a5431fa073d193cc6bbec51fbf1ba9a61845ab2503f85aa19ce3ed6afd", 1),
-
-            # Test case 2: Another valid address and postcode
-            # In this case, the newest EPC, does not have a uprn associated to it. If we did a search by
-            # uprn, we would get an old EPC
-            ("Flat 8, Hainton House", "DN32 9AQ", "", True,
-             "bd1149a20a73397184f07a9955f872424826e70f4870c058d71be887766ee1f8", 2),
-            # Test case 3: When we make a request to the API for this property, we get back results for
-            # flats 1, 2 and 3. We have some logic to handle the response so that we get back flat 1
-            ("Flat 1, 1 Tottenham Street, London", "W1T 2AE", 5167411, True,
-             "3e6414d7f15f4cf7a69dc20c469bcf043d31a49239b183f1bd0c0e1aafa23c93", 0),
-
-        ],
-    )
-    def test_find_property(self, epc_auth_token, address, postcode, uprn, skip_os, lmk_key, n_old_epcs):
-        """
-        Integration test for `find_property`, making actual API calls.
-        """
-        # Provide your actual API keys or tokens here
-        os_api_key = ""
-
-        # Initialize the SearchEpc instance
-        epc_searcher = SearchEpc(
-            address1=address,
-            postcode=postcode,
-            uprn=uprn,
-            auth_token=epc_auth_token,
-            os_api_key=os_api_key,
-        )
-
-        # Execute the method
-        epc_searcher.find_property(skip_os=skip_os)
-
-        # We check that we have the correct epc
-        assert epc_searcher.newest_epc["lmk-key"] == lmk_key
-        assert len(epc_searcher.older_epcs) == n_old_epcs
-
    def test_search_housenumber(self):
-        eg1 = 'Flat A11, Mortimer House, Grendon Road, Exeter'
+        eg1 = "Flat A11, Mortimer House, Grendon Road, Exeter"
        res1 = SearchEpc.get_house_number(eg1, None)
        assert res1 == "A11"

-        eg2 = 'Flat A9, Mortimer House, Grendon Road, Exeter, EX1 2NL'
+        eg2 = "Flat A9, Mortimer House, Grendon Road, Exeter, EX1 2NL"
        res2 = SearchEpc.get_house_number(eg2, None)
        assert res2 == "A9"
--- a/datatypes/epc/domain/epc_property_data.py
+++ b/datatypes/epc/domain/epc_property_data.py
@ -98,7 +98,9 @@ class MainHeatingDetail:
    boiler_flue_type: Optional[int] = None  # TODO: make enum?
    boiler_ignition_type: Optional[int] = None  # TODO: make enum?
    central_heating_pump_age: Optional[int] = None
-    central_heating_pump_age_str: Optional[str] = None  # str from site notes e.g. "Unknown", "Pre 2013"
+    central_heating_pump_age_str: Optional[str] = (
+        None  # str from site notes e.g. "Unknown", "Pre 2013"
+    )
    main_heating_index_number: Optional[int] = None
    sap_main_heating_code: Optional[int] = None  # TODO: make enum?
    main_heating_number: Optional[int] = None
@ -132,7 +134,7 @@ class ShowerOutlets:

@dataclass
 class SapHeating:
-    instantaneous_wwhrs: InstantaneousWwhrs
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
    main_heating_details: List[MainHeatingDetail]
    has_fixed_air_conditioning: bool
    cylinder_size: Optional[Union[int, str]] = (
@ -145,7 +147,9 @@ class SapHeating:
    cylinder_insulation_type: Optional[Union[int, str]] = None
    cylinder_thermostat: Optional[str] = None
    secondary_fuel_type: Optional[int] = None
-    secondary_heating_type: Optional[Union[int, str]] = None  # int from API; str from site notes
+    secondary_heating_type: Optional[Union[int, str]] = (
+        None  # int from API; str from site notes
+    )
    cylinder_insulation_thickness_mm: Optional[int] = None
    # SAP10 hot-water demand inputs from sap_heating.
    number_baths: Optional[int] = None
@ -168,7 +172,9 @@ class SapHeating:
 class SapVentilation:
    ventilation_type: Optional[str] = None
    draught_lobby: Optional[bool] = None
-    pressure_test: Optional[str] = None  # str from site notes e.g. "No test"; int in API via mechanical_ventilation
+    pressure_test: Optional[str] = (
+        None  # str from site notes e.g. "No test"; int in API via mechanical_ventilation
+    )
    open_flues_count: Optional[int] = None
    closed_flues_count: Optional[int] = None
    boiler_flues_count: Optional[int] = None
@ -476,8 +482,12 @@ class SapBuildingPart:
        None  # TODO: make enum/mapping?
    )
    floor_type: Optional[str] = None  # str from site notes e.g. "Ground Floor"
-    floor_construction_type: Optional[str] = None  # str from site notes; distinct from floor_construction: int in SapFloorDimension
-    floor_insulation_type_str: Optional[str] = None  # str from site notes e.g. "As Built"
+    floor_construction_type: Optional[str] = (
+        None  # str from site notes; distinct from floor_construction: int in SapFloorDimension
+    )
+    floor_insulation_type_str: Optional[str] = (
+        None  # str from site notes e.g. "As Built"
+    )
    floor_u_value_known: Optional[bool] = None

    roof_construction: Optional[int] = None
--- a/datatypes/epc/schema/rdsap_schema_17_0.py
+++ b/datatypes/epc/schema/rdsap_schema_17_0.py
@ -37,7 +37,7 @@ class SapHeating:
    cylinder_size: int
    water_heating_code: int
    water_heating_fuel: int
-    instantaneous_wwhrs: InstantaneousWwhrs
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
    main_heating_details: List[MainHeatingDetail]
    immersion_heating_type: Union[int, str]
    cylinder_insulation_type: int
--- a/datatypes/epc/schema/rdsap_schema_17_1.py
+++ b/datatypes/epc/schema/rdsap_schema_17_1.py
@ -41,7 +41,7 @@ class SapHeating:
    cylinder_size: int
    water_heating_code: int
    water_heating_fuel: int
-    instantaneous_wwhrs: InstantaneousWwhrs
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
    main_heating_details: List[MainHeatingDetail]
    immersion_heating_type: Union[int, str]
    cylinder_insulation_type: int
--- a/datatypes/epc/schema/rdsap_schema_18_0.py
+++ b/datatypes/epc/schema/rdsap_schema_18_0.py
@ -41,7 +41,7 @@ class SapHeating:
    cylinder_size: int
    water_heating_code: int
    water_heating_fuel: int
-    instantaneous_wwhrs: InstantaneousWwhrs
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
    main_heating_details: List[MainHeatingDetail]
    immersion_heating_type: Union[int, str]
    has_fixed_air_conditioning: str
@ -86,6 +86,7 @@ class SapFloorDimension:
@dataclass
 class SapRoomInRoof:
    """Room-in-roof details. floor_area is a Measurement object in schema 18.0."""
+
    floor_area: Measurement
    insulation: str
    roof_room_connected: str
--- a/datatypes/epc/schema/rdsap_schema_19_0.py
+++ b/datatypes/epc/schema/rdsap_schema_19_0.py
@ -41,7 +41,7 @@ class SapHeating:
    cylinder_size: int
    water_heating_code: int
    water_heating_fuel: int
-    instantaneous_wwhrs: InstantaneousWwhrs
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
    main_heating_details: List[MainHeatingDetail]
    immersion_heating_type: Union[int, str]
    has_fixed_air_conditioning: str
--- a/datatypes/epc/schema/rdsap_schema_20_0_0.py
+++ b/datatypes/epc/schema/rdsap_schema_20_0_0.py
@ -49,7 +49,7 @@ class SapHeating:
    cylinder_size: int
    water_heating_code: int
    water_heating_fuel: int
-    instantaneous_wwhrs: InstantaneousWwhrs
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs]
    main_heating_details: List[MainHeatingDetail]
    immersion_heating_type: Union[int, str]
    has_fixed_air_conditioning: str
@ -103,6 +103,7 @@ class SapFloorDimension:
@dataclass
 class SapRoomInRoof:
    """Room-in-roof details. floor_area is a plain number in schema 20.0.0 (not a Measurement object)."""
+
    floor_area: Union[int, float]
    insulation: str
    roof_room_connected: str
--- a/deployment/terraform/lambda/landlordDescriptionOverrides/main.tf
+++ b/deployment/terraform/lambda/landlordDescriptionOverrides/main.tf
@ -0,0 +1,50 @@
+data "terraform_remote_state" "shared" {
+  backend = "s3"
+  config = {
+    bucket = "assessment-model-terraform-state"
+    key    = "env:/${var.stage}/terraform.tfstate"
+    region = "eu-west-2"
+  }
+}
+
+data "aws_secretsmanager_secret_version" "db_credentials" {
+  secret_id = "${var.stage}/assessment_model/db_credentials"
+}
+
+locals {
+  db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
+}
+
+module "lambda" {
+  source = "../../modules/lambda_with_sqs"
+
+  name  = "landlord-description-overrides"
+  stage = var.stage
+
+  image_uri = local.image_uri
+
+  # The classifier calls OpenAI once per distinct description per column, so it
+  # is latency-bound. 300s leaves headroom under the queue's 1000s visibility
+  # timeout. batch_size = 1 keeps one upload per invocation, so a single bad
+  # record cannot redrive its siblings. maximum_concurrency caps fan-out to
+  # respect OpenAI rate limits.
+  timeout             = 300
+  batch_size          = 1
+  maximum_concurrency = 5
+
+  environment = merge(
+    {
+      STAGE             = var.stage
+      LOG_LEVEL         = "info"
+      POSTGRES_USERNAME = local.db_credentials.db_assessment_model_username
+      POSTGRES_PASSWORD = local.db_credentials.db_assessment_model_password
+      OPENAI_API_KEY    = var.openai_api_key
+    },
+  )
+}
+
+# Attach S3 read policy so the handler can read the original upload CSV.
+resource "aws_iam_role_policy_attachment" "landlord_overrides_s3_read" {
+  role       = module.lambda.role_name
+  policy_arn = data.terraform_remote_state.shared.outputs.landlord_overrides_s3_read_arn
+}
--- a/deployment/terraform/lambda/landlordDescriptionOverrides/outputs.tf
+++ b/deployment/terraform/lambda/landlordDescriptionOverrides/outputs.tf
@ -0,0 +1,9 @@
+output "landlord_description_overrides_queue_url" {
+  value       = module.lambda.queue_url
+  description = "URL of the Landlord Description Overrides SQS queue (wire into the FastAPI LANDLORD_OVERRIDES_SQS_URL)"
+}
+
+output "landlord_description_overrides_queue_arn" {
+  value       = module.lambda.queue_arn
+  description = "ARN of the Landlord Description Overrides SQS queue"
+}
--- a/deployment/terraform/lambda/landlordDescriptionOverrides/provider.tf
+++ b/deployment/terraform/lambda/landlordDescriptionOverrides/provider.tf
@ -0,0 +1,16 @@
+terraform {
+  required_providers {
+    aws = {
+      source  = "hashicorp/aws"
+      version = ">= 5.0"
+    }
+  }
+
+  backend "s3" {
+    bucket = "landlord-description-overrides-terraform-state"
+    key    = "terraform.tfstate"
+    region = "eu-west-2"
+  }
+
+  required_version = ">= 1.2.0"
+}
--- a/deployment/terraform/lambda/landlordDescriptionOverrides/variables.tf
+++ b/deployment/terraform/lambda/landlordDescriptionOverrides/variables.tf
@ -0,0 +1,33 @@
+variable "lambda_name" {
+  type        = string
+  description = "Logical name of the lambda (e.g. landlordDescriptionOverrides)"
+}
+
+variable "stage" {
+  description = "Deployment stage (e.g. dev, prod)"
+  type        = string
+}
+
+variable "ecr_repo_url" {
+  type        = string
+  description = "ECR repository URL (no tag, no digest)"
+}
+
+variable "image_digest" {
+  type        = string
+  description = "Image digest (sha256:...)"
+}
+
+variable "openai_api_key" {
+  type        = string
+  description = "OpenAI API key used by the ChatGPT column classifier"
+  sensitive   = true
+}
+
+locals {
+  image_uri = "${var.ecr_repo_url}@${var.image_digest}"
+}
+
+output "resolved_image_uri" {
+  value = local.image_uri
+}
--- a/deployment/terraform/shared/main.tf
+++ b/deployment/terraform/shared/main.tf
@ -268,11 +268,11 @@ output "retrofit_heat_baseline_predictions_bucket_name" {

 // We make this bucket presignable, because we want to generate download links for the frontend
 module "retrofit_energy_assessments" {
-  source             = "../modules/s3_presignable_bucket"
-  bucketname         = "retrofit-energy-assessments-${var.stage}"
-  allowed_origins    = var.allowed_origins
-  environment        = var.stage
-  enable_versioning  = true
+  source            = "../modules/s3_presignable_bucket"
+  bucketname        = "retrofit-energy-assessments-${var.stage}"
+  allowed_origins   = var.allowed_origins
+  environment       = var.stage
+  enable_versioning = true
 }

 output "retrofit_energy_assessments_bucket_name" {
@ -494,6 +494,42 @@ output "postcode_splitter_s3_read_arn" {
  value = module.postcode_splitter_s3_read.policy_arn
 }

+################################################
+# Landlord Description Overrides – Lambda
+################################################
+module "landlord_description_overrides_state_bucket" {
+  source      = "../modules/tf_state_bucket"
+  bucket_name = "landlord-description-overrides-terraform-state"
+}
+
+module "landlord_description_overrides_registry" {
+  source = "../modules/container_registry"
+  name   = "landlord_description_overrides"
+  stage  = var.stage
+}
+
+# S3 policy for the landlord classifier to read the original upload CSV.
+module "landlord_overrides_s3_read" {
+  source = "../modules/s3_iam_policy"
+
+  policy_name = "LandlordOverridesReadS3"
+  # NOTE: aws_iam_policy.description is ForceNew — changing it destroys+recreates the
+  # policy, which deadlocks because the policy is attached to the lambda role in the
+  # separate landlordDescriptionOverrides stack (DeleteConflict). Keep this string
+  # byte-for-byte identical to what's in state so the bucket change applies in-place.
+  policy_description = "Allow landlord description overrides Lambda to read from retrofit-data bucket"
+  bucket_arns = [
+    "arn:aws:s3:::retrofit-plan-inputs-${var.stage}",
+    "arn:aws:s3:::retrofit-data-${var.stage}",
+  ]
+  actions        = ["s3:GetObject", "s3:ListBucket"]
+  resource_paths = ["/*"]
+}
+
+output "landlord_overrides_s3_read_arn" {
+  value = module.landlord_overrides_s3_read.policy_arn
+}
+
 ################################################
 # Bulk Address2UPRN Combiner – Lambda ECR
 ################################################
@ -729,7 +765,7 @@ module "hubspot_etl_bucket" {
 module "hubspot_etl_registry" {
  source = "../modules/container_registry"
  name   = "hubspot-etl"
-  stage = var.stage
+  stage  = var.stage

 }

--- a/docs/adr/0003-python-writes-landlord-overrides-directly.md
+++ b/docs/adr/0003-python-writes-landlord-overrides-directly.md
@ -0,0 +1,79 @@
+# ADR-0003: Python writes landlord overrides directly to Postgres
+
+**Status:** Accepted
+**Date:** 2026-05-26
+**Supersedes (in part):** [assessment-model/docs/adr/0002-landlord-override-vocabulary.md](https://github.com/.../assessment-model/blob/main/docs/adr/0002-landlord-override-vocabulary.md) — specifically the clause beginning *"Writes happen from Next.js …"*.
+
+## Context
+
+ADR-0002 (in the `assessment-model` TS repo) defined the `landlord_property_type_overrides` and `landlord_wall_type_overrides` tables and noted that the Model service would POST classification results to a Next.js route handler, with Next.js performing the upsert. Drizzle remained the schema source of truth.
+
+That extra hop has not been built and is now judged unnecessary for the present scope:
+
+- The classification result is internal — a Lambda computes it, the same Lambda persists it. No third party needs to participate in the write.
+- Drizzle remains the schema's source of truth either way: the Python adapter mirrors the schema in a SQLModel row, but the migrations stay with Drizzle. Adding a Next.js route would not change which side owns schema definition.
+- The Python lambda already lives next to a Postgres connection in the existing pipeline (`subtask`/`tasks` tables are written from Python today). Adding two more tables to that adapter surface is a small, well-understood change. Routing the same writes through Next.js would mean: lambda → JSON-over-HTTP → Next.js route → Drizzle → Postgres, instead of lambda → SQLAlchemy → Postgres. Three extra moving parts to ship, deploy, monitor, and authenticate for no behavioural gain.
+
+## Decision
+
+The Model service (specifically `applications/landlord_description_overrides/handler.py`) writes directly to `landlord_property_type_overrides` and `landlord_wall_type_overrides` via a SQLAlchemy-backed `LandlordOverrideRepository[E]` adapter. No Next.js route handler is required.
+
+Transaction boundaries live in `infrastructure/postgres/engine.transactional_session` — a context manager that commits on clean exit and rolls back on exception. The application layer (`handler.py`) never calls `.commit()` or `.rollback()` itself; it only opens the context. Orchestration and repository code likewise never commits — keeping transaction semantics confined to one infrastructure helper.
+
+The conflict policy lives in SQL and is identical for every override category. A single generic adapter, `LandlordOverridesRepository[E]`, implements it once; the target table is selected by the SQLModel `…Row` class passed at construction. Each category (property / built-form / wall / roof type) is that same adapter parameterised by its row class:
+
+```sql
+INSERT INTO landlord_property_type_overrides (portfolio_id, description, value, source)
+VALUES …
+ON CONFLICT (portfolio_id, description)
+DO UPDATE SET value = EXCLUDED.value,
+              source = EXCLUDED.source,
+              updated_at = now()
+WHERE landlord_property_type_overrides.source = 'classifier';
+```
+
+The `WHERE existing.source = 'classifier'` guard is load-bearing: it lets the classifier refresh its own past output while leaving `source = 'user'` rows untouched. This is the contract ADR-0002's `source` column was added for.
+
+`UNKNOWN` values are persisted, not skipped — consistent with ADR-0002 §5. A future user override can upgrade them.
+
+## Consequences
+
+**Positive.**
+
+- One fewer service to deploy, monitor, and authenticate.
+- The classifier and persistence live in the same process — failures surface against a single `sub_task` row, not split across two systems.
+- The Postgres adapter mirrors the existing `subtask`/`tasks` repositories, so reviewers have a precedent to compare against.
+
+**Negative.**
+
+- The Python repo now holds two schemas — the schema-source-of-truth Drizzle definition lives in the TS repo, and the Python `SQLModel` row class shadows it. They must stay in lockstep. Mitigations: the TS schema header comment (`landlord_overrides.ts:12`) already names the Python source-of-truth file; a future ADR may add a CI check that diffs the two.
+- The boundary that ADR-0002 anticipated for pgEnum validation (a Next.js route validating incoming values before insert) is gone. Pydantic + the Python `Enum` type catch invalid values on the producing side, and Postgres's pgEnum will reject anything that slips through.
+
+## File layout
+
+This ADR also fixes a placement convention for Postgres adapters going forward. The codebase currently has the ChatGPT classifier split cleanly along DDD lines — port in `domain/`, adapter in `infrastructure/chatgpt/` — but the `tasks` Postgres adapter does not follow the same shape: its concrete class lives in `repositories/tasks/`, not `infrastructure/postgres/`.
+
+The convention going forward separates the persistence *behaviour* (grouped by aggregate) from the schema *mirrors* (grouped by technology, since they share pgEnums and engine metadata):
+
+- **Port (protocol / abstract base):** `repositories/<aggregate>/<thing>_repository.py`
+- **Postgres repository adapter (concrete):** `infrastructure/<aggregate>/<aggregate>_postgres_repository.py`
+- **SQLModel row class (`table=True` schema mirror):** `infrastructure/postgres/<thing>_table.py`
+
+The `LandlordOverridesRepository` adapter follows this convention: the concrete class — the aggregate's "talker" to Postgres — lives at `infrastructure/landlord_overrides/landlord_overrides_postgres_repository.py`, while the per-category `…Row` classes stay in `infrastructure/postgres/`. The `…Row` classes are one-per-table — each mirrors a genuinely distinct Drizzle table and `value` pgEnum, and they share the single `override_source` pgEnum instance, so they belong together in the Postgres technology bucket as schema mirrors, not duplicated logic.
+
+(This refines the placement first sketched in this ADR, which put the adapter in `infrastructure/postgres/` alongside the row classes. The adapter holds no schema — only the write path — so it groups by aggregate; only the `table=True` mirrors stay tech-bucketed.)
+
+**Existing outliers to relocate in a follow-up:**
+
+- `repositories/tasks/task_postgres_repository.py` → `infrastructure/tasks/task_postgres_repository.py`
+- `repositories/tasks/subtask_postgres_repository.py` → `infrastructure/tasks/subtask_postgres_repository.py`
+
+(Their `task_table.py` / `subtask_table.py` schema mirrors already sit correctly in `infrastructure/postgres/`.) Both moves are mechanical (import-path updates only). They are intentionally out of scope for the present PR.
+
+## Out of scope (deferred to follow-up work)
+
+- Relocating `task_postgres_repository.py` and `subtask_postgres_repository.py` into `infrastructure/tasks/` per the convention above.
+- ~~Extracting a shared upsert helper / base class once a third `landlord_*_overrides` column lands — until then the per-category adapters' 95%-identical bodies are kept side-by-side for direct comparison.~~ **Done.** The per-category adapter bodies were byte-identical (varying only in their row class), so they were consolidated into one generic `LandlordOverridesRepository[E]` parameterised by row class rather than waiting for a third column.
+- Switching `applications/landlord_description_overrides/handler.py` to acquire its `Session` via a `@subtask_handler()`-style decorator instead of building its own engine.
+- A cross-repo PR amending ADR-0002 to point at this ADR.
+- A CI check (or codegen) that diffs the Drizzle pgEnum literals against the Python `Enum.value` strings.
--- a/domain/addresses/postcode_batching.py
+++ b/domain/addresses/postcode_batching.py
@ -2,21 +2,21 @@ from __future__ import annotations

 from collections.abc import Iterable, Iterator

-from domain.addresses.user_address import UserAddress
+from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
 from domain.postcode import Postcode


 def iter_postcode_grouped_batches(
-    addresses: Iterable[UserAddress],
+    addresses: Iterable[UnstandardisedAddress],
    *,
    max_batch_size: int = 500,
-) -> Iterator[list[UserAddress]]:
+) -> Iterator[AddressList]:
    if max_batch_size < 1:
        raise ValueError("max_batch_size must be >= 1")

    groups = _group_by_postcode_in_order(addresses)

-    buffer: list[UserAddress] = []
+    buffer: AddressList = AddressList([])
    for group in groups.values():
        group_len = len(group)

@ -26,14 +26,14 @@ def iter_postcode_grouped_batches(
        if group_len >= max_batch_size:
            if buffer:
                yield buffer
-                buffer = []
+                buffer = AddressList([])
            yield group
            continue

        # Adding this group would overflow: flush buffer before appending.
        if len(buffer) + group_len > max_batch_size:
            yield buffer
-            buffer = []
+            buffer = AddressList([])

        buffer.extend(group)

@ -43,9 +43,9 @@ def iter_postcode_grouped_batches(


 def _group_by_postcode_in_order(
-    addresses: Iterable[UserAddress],
-) -> dict[Postcode, list[UserAddress]]:
-    groups: dict[Postcode, list[UserAddress]] = {}
+    addresses: Iterable[UnstandardisedAddress],
+) -> dict[Postcode, AddressList]:
+    groups: dict[Postcode, AddressList] = {}
    for address in addresses:
-        groups.setdefault(address.postcode, []).append(address)
+        groups.setdefault(address.postcode, AddressList([])).append(address)
    return groups
--- a/domain/addresses/standardised_address_list.py
+++ b/domain/addresses/standardised_address_list.py
@ -0,0 +1,21 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import NewType, Optional
+
+from domain.postcode import Postcode
+
+
+def _empty_source_row() -> dict[str, str]:
+    return {}
+
+
+@dataclass(frozen=True)
+class StandardisedAddress:
+    address: str
+    postcode: Postcode
+    org_reference: Optional[str] = None
+
+
+# Standardised Asset List -- the cleaned output counterpart to AddressList.
+SAL = NewType("SAL", list[StandardisedAddress])
--- a/domain/addresses/unstandardised_address.py
+++ b/domain/addresses/unstandardised_address.py
@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Optional, NewType
+
+from domain.postcode import Postcode
+
+
+def _empty_source_row() -> dict[str, str]:
+    return {}
+
+
+@dataclass(frozen=True)
+class UnstandardisedAddress:
+    address: str
+    postcode: Postcode
+    org_reference: Optional[str] = None
+    additional_info: dict[str, str] = field(
+        default_factory=_empty_source_row, compare=False
+    )
+
+
+# A batch of raw, pre-standardisation addresses as supplied by a landlord.
+AddressList = NewType("AddressList", list[UnstandardisedAddress])
--- a/domain/addresses/user_address.py
+++ b/domain/addresses/user_address.py
@ -1,18 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass, field
-from typing import Optional
-
-from domain.postcode import Postcode
-
-
-def _empty_source_row() -> dict[str, str]:
-    return {}
-
-
-@dataclass(frozen=True)
-class UserAddress:
-    user_address: str
-    postcode: Postcode
-    internal_reference: Optional[str] = None
-    source_row: dict[str, str] = field(default_factory=_empty_source_row, compare=False)
--- a/domain/data_transformation/init.py
+++ b/domain/data_transformation/init.py
--- a/domain/data_transformation/column_classifier.py
+++ b/domain/data_transformation/column_classifier.py
@ -0,0 +1,39 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from enum import Enum
+from typing import Generic, TypeVar
+
+E = TypeVar("E", bound=Enum)
+
+
+class ClassificationError(Exception):
+    """Raised when classifying a column's descriptions fails wholesale.
+
+    A whole-batch failure (the AI backend is unreachable, or returns a reply
+    that cannot be parsed) raises this. A single description that merely
+    cannot be resolved is not an error -- it maps to the enum's UNKNOWN member.
+    """
+
+
+class ColumnClassifier(ABC, Generic[E]):
+    """Port: resolves free-text descriptions into a category enum ``E``.
+
+    One classifier handles one landlord-CSV column. Implementations decide
+    *how* the mapping is performed (an LLM, a lookup table, a rules engine);
+    ``LandlordDescriptionOverridesOrchestrator`` depends only on this interface.
+    """
+
+    @abstractmethod
+    def classify(self, descriptions: set[str]) -> dict[str, E]:
+        """Classify each description into a category enum member.
+
+        Every input description appears as a key in the result. A description
+        that cannot be resolved maps to the enum's UNKNOWN member.
+
+        Raises:
+            ClassificationError: If the classification call fails wholesale
+                (e.g. the backend is unreachable or returns an unparseable
+                response).
+        """
+        ...
--- a/tests/repositories/user_address/init.py
+++ b/tests/repositories/user_address/init.py
--- a/domain/epc/built_form_type.py
+++ b/domain/epc/built_form_type.py
@ -0,0 +1,20 @@
+from enum import Enum
+
+
+class BuiltFormType(Enum):
+    """A landlord-supplied built form, as resolved by the landlord-description-overrides context.
+
+    Mirrors the EPC built-form values. ``NOT_RECORDED`` is the legitimate
+    EPC value for properties whose built form the surveyor did not capture;
+    ``UNKNOWN`` is the classifier fallback for landlord values that cannot be
+    resolved at all.
+    """
+
+    DETACHED = "Detached"
+    SEMI_DETACHED = "Semi-Detached"
+    MID_TERRACE = "Mid-Terrace"
+    END_TERRACE = "End-Terrace"
+    ENCLOSED_MID_TERRACE = "Enclosed Mid-Terrace"
+    ENCLOSED_END_TERRACE = "Enclosed End-Terrace"
+    NOT_RECORDED = "Not Recorded"
+    UNKNOWN = "Unknown"
--- a/domain/epc/property_type.py
+++ b/domain/epc/property_type.py
@ -0,0 +1,16 @@
+from enum import Enum
+
+
+class PropertyType(Enum):
+    """A landlord-supplied property type, as resolved by the landlord-description-overrides context.
+
+    Distinct from the EPC context's ``PropertyType``: a landlord CSV value
+    may be unresolvable, so this enum carries an explicit ``UNKNOWN`` member.
+    """
+
+    HOUSE = "House"
+    BUNGALOW = "Bungalow"
+    FLAT = "Flat"
+    MAISONETTE = "Maisonette"
+    PARK_HOME = "Park home"
+    UNKNOWN = "Unknown"
--- a/domain/epc/roof_type.py
+++ b/domain/epc/roof_type.py
@ -0,0 +1,70 @@
+from enum import Enum
+
+
+class RoofType(Enum):
+    """A landlord-supplied roof description, as resolved by the landlord-description-overrides context.
+
+    Each member is one full EPC roof-description string, combining shape
+    (flat, pitched, roof room(s), thatched) with insulation state and, for
+    pitched roofs, the loft-insulation depth in millimetres. Adjacency
+    markers like ``(another dwelling above)`` represent a unit whose top
+    boundary is another dwelling rather than a roof of its own; they are
+    kept as members because they appear in the same EPC column.
+    ``UNKNOWN`` covers values the classifier cannot resolve -- most
+    commonly raw ``Average thermal transmittance`` U-value strings that
+    carry no shape/insulation information.
+    """
+
+    FLAT_INSULATED = "Flat, insulated"
+    FLAT_INSULATED_ASSUMED = "Flat, insulated (assumed)"
+    FLAT_LIMITED_INSULATION = "Flat, limited insulation"
+    FLAT_LIMITED_INSULATION_ASSUMED = "Flat, limited insulation (assumed)"
+    FLAT_NO_INSULATION = "Flat, no insulation"
+    FLAT_NO_INSULATION_ASSUMED = "Flat, no insulation (assumed)"
+
+    PITCHED_INSULATED = "Pitched, insulated"
+    PITCHED_INSULATED_ASSUMED = "Pitched, insulated (assumed)"
+    PITCHED_INSULATED_AT_RAFTERS = "Pitched, insulated at rafters"
+    PITCHED_LIMITED_INSULATION = "Pitched, limited insulation"
+    PITCHED_LIMITED_INSULATION_ASSUMED = "Pitched, limited insulation (assumed)"
+    PITCHED_NO_INSULATION = "Pitched, no insulation"
+    PITCHED_NO_INSULATION_ASSUMED = "Pitched, no insulation (assumed)"
+    PITCHED_UNKNOWN_LOFT_INSULATION = "Pitched, Unknown loft insulation"
+    PITCHED_LOFT_0MM = "Pitched, 0 mm loft insulation"
+    PITCHED_LOFT_12MM = "Pitched, 12 mm loft insulation"
+    PITCHED_LOFT_25MM = "Pitched, 25 mm loft insulation"
+    PITCHED_LOFT_50MM = "Pitched, 50 mm loft insulation"
+    PITCHED_LOFT_75MM = "Pitched, 75 mm loft insulation"
+    PITCHED_LOFT_100MM = "Pitched, 100 mm loft insulation"
+    PITCHED_LOFT_125MM = "Pitched, 125 mm loft insulation"
+    PITCHED_LOFT_150MM = "Pitched, 150 mm loft insulation"
+    PITCHED_LOFT_175MM = "Pitched, 175 mm loft insulation"
+    PITCHED_LOFT_200MM = "Pitched, 200 mm loft insulation"
+    PITCHED_LOFT_225MM = "Pitched, 225 mm loft insulation"
+    PITCHED_LOFT_250MM = "Pitched, 250 mm loft insulation"
+    PITCHED_LOFT_270MM = "Pitched, 270 mm loft insulation"
+    PITCHED_LOFT_300MM = "Pitched, 300 mm loft insulation"
+    PITCHED_LOFT_350MM = "Pitched, 350 mm loft insulation"
+    PITCHED_LOFT_400MM = "Pitched, 400 mm loft insulation"
+    PITCHED_LOFT_400_PLUS_MM = "Pitched, 400+ mm loft insulation"
+
+    ROOF_ROOM_INSULATED = "Roof room(s), insulated"
+    ROOF_ROOM_INSULATED_ASSUMED = "Roof room(s), insulated (assumed)"
+    ROOF_ROOM_LIMITED_INSULATION = "Roof room(s), limited insulation"
+    ROOF_ROOM_LIMITED_INSULATION_ASSUMED = "Roof room(s), limited insulation (assumed)"
+    ROOF_ROOM_NO_INSULATION = "Roof room(s), no insulation"
+    ROOF_ROOM_NO_INSULATION_ASSUMED = "Roof room(s), no insulation (assumed)"
+    ROOF_ROOM_CEILING_INSULATED = "Roof room(s), ceiling insulated"
+    ROOF_ROOM_THATCHED = "Roof room(s), thatched"
+    ROOF_ROOM_THATCHED_WITH_ADDITIONAL_INSULATION = "Roof room(s), thatched with additional insulation"
+
+    THATCHED = "Thatched"
+    THATCHED_WITH_ADDITIONAL_INSULATION = "Thatched, with additional insulation"
+
+    ADJACENT_ANOTHER_DWELLING_ABOVE = "(another dwelling above)"
+    ADJACENT_SAME_DWELLING_ABOVE = "(same dwelling above)"
+    ADJACENT_OTHER_PREMISES_ABOVE = "(other premises above)"
+    ADJACENT_ANOTHER_PREMISES_ABOVE = "(another premises above)"
+    ANOTHER_PREMISES_ABOVE = "Another Premises Above"
+
+    UNKNOWN = "Unknown"
--- a/domain/epc/wall_type.py
+++ b/domain/epc/wall_type.py
@ -0,0 +1,117 @@
+from enum import Enum
+
+
+class WallType(Enum):
+    """A landlord-supplied wall description, as resolved by the landlord-description-overrides context.
+
+    Each member is one full EPC wall-description string, combining material
+    (cavity, solid brick, sandstone, …) with construction/insulation state
+    (as built, filled cavity, with internal insulation, …). ``UNKNOWN`` covers
+    values the classifier cannot resolve — most commonly raw
+    ``Average thermal transmittance`` U-value strings that carry no material
+    information.
+    """
+
+    CAVITY_FILLED = "Cavity wall, filled cavity"
+    CAVITY_AS_BUILT_INSULATED_ASSUMED = (
+        "Cavity wall, as built, insulated (assumed)"  # 1983 - 1990
+    )
+    CAVITY_AS_BUILT_NO_INSULATION_ASSUMED = (
+        "Cavity wall, as built, no insulation (assumed)"  # Pre-1975
+    )
+
+    CAVITY_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
+        "Cavity wall, as built, partial insulation (assumed)"  # 1976 - 1982
+    )
+    CAVITY_WITH_INTERNAL_INSULATION = "Cavity wall, with internal insulation"
+    CAVITY_WITH_EXTERNAL_INSULATION = "Cavity wall, with external insulation"
+    CAVITY_FILLED_AND_INTERNAL_INSULATION = (
+        "Cavity wall, filled cavity and internal insulation"
+    )
+    CAVITY_FILLED_AND_EXTERNAL_INSULATION = (
+        "Cavity wall, filled cavity and external insulation"
+    )
+
+    SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED = (
+        "Solid brick, as built, no insulation (assumed)"
+    )
+    SOLID_BRICK_AS_BUILT_INSULATED_ASSUMED = (
+        "Solid brick, as built, insulated (assumed)"
+    )
+    SOLID_BRICK_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
+        "Solid brick, as built, partial insulation (assumed)"
+    )
+    SOLID_BRICK_WITH_INTERNAL_INSULATION = "Solid brick, with internal insulation"
+    SOLID_BRICK_WITH_EXTERNAL_INSULATION = "Solid brick, with external insulation"
+
+    TIMBER_FRAME_AS_BUILT_NO_INSULATION_ASSUMED = (
+        "Timber frame, as built, no insulation (assumed)"
+    )
+    TIMBER_FRAME_AS_BUILT_INSULATED_ASSUMED = (
+        "Timber frame, as built, insulated (assumed)"
+    )
+    TIMBER_FRAME_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
+        "Timber frame, as built, partial insulation (assumed)"
+    )
+    TIMBER_FRAME_WITH_ADDITIONAL_INSULATION = "Timber frame, with additional insulation"
+
+    SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED = (
+        "Sandstone, as built, no insulation (assumed)"
+    )
+    SANDSTONE_AS_BUILT_INSULATED_ASSUMED = "Sandstone, as built, insulated (assumed)"
+    SANDSTONE_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
+        "Sandstone, as built, partial insulation (assumed)"
+    )
+    SANDSTONE_WITH_INTERNAL_INSULATION = "Sandstone, with internal insulation"
+    SANDSTONE_WITH_EXTERNAL_INSULATION = "Sandstone, with external insulation"
+
+    GRANITE_OR_WHIN_AS_BUILT_NO_INSULATION_ASSUMED = (
+        "Granite or whin, as built, no insulation (assumed)"
+    )
+    GRANITE_OR_WHIN_AS_BUILT_INSULATED_ASSUMED = (
+        "Granite or whin, as built, insulated (assumed)"
+    )
+    GRANITE_OR_WHIN_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
+        "Granite or whin, as built, partial insulation (assumed)"
+    )
+    GRANITE_OR_WHIN_WITH_INTERNAL_INSULATION = (
+        "Granite or whin, with internal insulation"
+    )
+    GRANITE_OR_WHIN_WITH_EXTERNAL_INSULATION = (
+        "Granite or whin, with external insulation"
+    )
+
+    SYSTEM_BUILT_AS_BUILT_NO_INSULATION_ASSUMED = (
+        "System built, as built, no insulation (assumed)"
+    )
+    SYSTEM_BUILT_AS_BUILT_INSULATED_ASSUMED = (
+        "System built, as built, insulated (assumed)"
+    )
+    SYSTEM_BUILT_AS_BUILT_PARTIAL_INSULATION_ASSUMED = (
+        "System built, as built, partial insulation (assumed)"
+    )
+    SYSTEM_BUILT_WITH_INTERNAL_INSULATION = "System built, with internal insulation"
+    SYSTEM_BUILT_WITH_EXTERNAL_INSULATION = "System built, with external insulation"
+
+    PARK_HOME_AS_BUILT = "Park home wall, as built"
+    PARK_HOME_WITH_INTERNAL_INSULATION = "Park home wall, with internal insulation"
+    PARK_HOME_WITH_EXTERNAL_INSULATION = "Park home wall, with external insulation"
+
+    COB_AS_BUILT = "Cob, as built"
+    COB_WITH_INTERNAL_INSULATION = "Cob, with internal insulation"
+    COB_WITH_EXTERNAL_INSULATION = "Cob, with external insulation"
+
+    CURTAIN_WALL = "Curtain wall"
+    CURTAIN_WALL_AS_BUILT_NO_INSULATION_ASSUMED = (
+        "Curtain Wall, as built, no insulation (assumed)"
+    )
+    CURTAIN_WALL_AS_BUILT_INSULATED_ASSUMED = (
+        "Curtain Wall, as built, insulated (assumed)"
+    )
+    CURTAIN_WALL_FILLED = "Curtain Wall, filled cavity"
+    CURTAIN_WALL_WITH_INTERNAL_INSULATION = "Curtain Wall, with internal insulation"
+
+    BASEMENT_WALL = "Basement wall"
+    BASEMENT_WALL_AS_BUILT = "Basement wall, as built"
+
+    UNKNOWN = "Unknown"
--- a/domain/epc/wall_type_construction_dates.py
+++ b/domain/epc/wall_type_construction_dates.py
@ -0,0 +1,72 @@
+"""Construction-date metadata for the "assumed" ``WallType`` variants.
+
+The ``(assumed)`` variants of ``WallType`` are what RdSAP picks when a
+surveyor has no direct observation and falls back to the typical wall make-up
+for a property's build era. The era boundaries reflect UK Building
+Regulations milestones for cavity-wall insulation:
+
+* up to 1975 -- no cavity insulation requirement
+* 1976-1982 -- partial-fill cavity (early insulation requirement)
+* 1983-1990 -- full-fill cavity (insulation required)
+
+Captured here as a structured lookup so:
+
+* the LLM prompt builder can render the ranges as a hint, helping the
+  classifier resolve era-implying landlord descriptions to the right
+  ``(assumed)`` variant;
+* future date-aware paths (a deterministic year-to-variant shortcut, a
+  date-keyed repo) can read from the same source instead of duplicating
+  the knowledge.
+
+Only the variants where we have a defensible era boundary appear here; the
+remaining ``(assumed)`` members are left out rather than guessed.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Mapping, Optional
+
+from domain.epc.wall_type import WallType
+
+
+@dataclass(frozen=True)
+class YearRange:
+    """An inclusive year range. ``None`` on either end means "no bound"."""
+
+    start: Optional[int] = None
+    end: Optional[int] = None
+
+    def __str__(self) -> str:
+        if self.start is None and self.end is not None:
+            return f"pre-{self.end + 1}"
+        if self.start is not None and self.end is None:
+            return f"{self.start}+"
+        return f"{self.start}-{self.end}"
+
+
+WALL_TYPE_CONSTRUCTION_YEARS: Mapping[WallType, YearRange] = {
+    WallType.CAVITY_AS_BUILT_NO_INSULATION_ASSUMED: YearRange(end=1975),
+    WallType.CAVITY_AS_BUILT_PARTIAL_INSULATION_ASSUMED: YearRange(
+        start=1976, end=1982
+    ),
+    WallType.CAVITY_AS_BUILT_INSULATED_ASSUMED: YearRange(start=1983, end=1990),
+}
+
+
+def wall_type_construction_date_prompt_hint() -> str:
+    """Render the date metadata as a prompt fragment for the LLM classifier.
+
+    The fragment lists each (variant, year range) pair so the model can
+    prefer the era-matching ``(assumed)`` variant when a landlord
+    description carries era information (e.g. "1970s semi", "built before
+    the war").
+    """
+    lines = [
+        f"- {wall_type.value!r}: typically built {year_range}"
+        for wall_type, year_range in WALL_TYPE_CONSTRUCTION_YEARS.items()
+    ]
+    return (
+        "When the description carries construction-era information, prefer "
+        "the category whose typical build year matches:\n" + "\n".join(lines)
+    )
--- a/infrastructure/chatgpt/init.py
+++ b/infrastructure/chatgpt/init.py
--- a/infrastructure/chatgpt/chatgpt.py
+++ b/infrastructure/chatgpt/chatgpt.py
@ -0,0 +1,60 @@
+from __future__ import annotations
+
+import os
+from typing import Optional
+
+from openai import OpenAI
+from openai.types.chat import ChatCompletionMessageParam
+
+from infrastructure.chatgpt.exceptions import ChatGPTClientError
+
+
+class ChatGPT:
+    """Thin wrapper over the OpenAI Chat Completions API.
+
+    Sends a single prompt and returns the assistant's reply as plain text.
+    """
+
+    DEFAULT_MODEL = "gpt-4o-mini"
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: Optional[str] = None,
+    ) -> None:
+        key = api_key or os.environ.get("OPENAI_API_KEY")
+        if not key:
+            raise ChatGPTClientError(
+                "No OpenAI API key provided. "
+                "Pass api_key or set the OPENAI_API_KEY environment variable."
+            )
+        self._client = OpenAI(api_key=key)
+        self._model = model or self.DEFAULT_MODEL
+
+    def generate(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+    ) -> str:
+        """Send a prompt to the model and return its reply text.
+
+        Args:
+            prompt: The user message to send.
+            system_prompt: Optional instruction that sets the model's behaviour.
+
+        Raises:
+            OpenAiClientError: If the model returns an empty response.
+        """
+        messages: list[ChatCompletionMessageParam] = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": prompt})
+
+        response = self._client.chat.completions.create(
+            model=self._model,
+            messages=messages,
+        )
+        content = response.choices[0].message.content
+        if content is None:
+            raise ChatGPTClientError("ChatGPT returned an empty response.")
+        return content
--- a/infrastructure/chatgpt/chatgpt_column_classifier.py
+++ b/infrastructure/chatgpt/chatgpt_column_classifier.py
@ -0,0 +1,99 @@
+from __future__ import annotations
+
+import json
+from enum import Enum
+from typing import Any, Optional, TypeVar
+
+from domain.data_transformation.column_classifier import (
+    ClassificationError,
+    ColumnClassifier,
+)
+from infrastructure.chatgpt.chatgpt import ChatGPT
+from infrastructure.chatgpt.exceptions import ChatGPTClientError
+
+E = TypeVar("E", bound=Enum)
+
+
+class ChatGptColumnClassifier(ColumnClassifier[E]):
+    """ColumnClassifier backed by ChatGPT, parametrised by a category enum.
+
+    The same classification path -- prompt, JSON parsing, UNKNOWN fallback --
+    serves any category enum; only ``category_enum`` and its ``unknown``
+    member differ between columns.
+    """
+
+    def __init__(
+        self,
+        chat_gpt: ChatGPT,
+        category_enum: type[E],
+        unknown: E,
+        extra_instructions: Optional[str] = None,
+    ) -> None:
+        self._chat_gpt = chat_gpt
+        self._category_enum = category_enum
+        self._unknown = unknown
+        # Free-form column-specific guidance appended to the system prompt
+        # ahead of the JSON-output instruction. Lets each column ship its
+        # own hints (e.g. wall-type construction-era ranges) without the
+        # generic classifier knowing what they are.
+        self._extra_instructions = extra_instructions
+
+    def classify(self, descriptions: set[str]) -> dict[str, E]:
+        if not descriptions:
+            return {}
+        try:
+            reply = self._chat_gpt.generate(
+                prompt=json.dumps(sorted(descriptions)),
+                system_prompt=self._system_prompt(),
+            )
+        except ChatGPTClientError as error:
+            raise ClassificationError(
+                f"ChatGPT classification failed for "
+                f"{self._category_enum.__name__}."
+            ) from error
+        try:
+            raw: dict[str, Any] = json.loads(self._strip_code_fence(reply))
+        except json.JSONDecodeError as error:
+            raise ClassificationError(
+                f"ChatGPT returned a reply that is not valid JSON: {reply!r}"
+            ) from error
+        return {
+            description: self._to_category(raw.get(description))
+            for description in descriptions
+        }
+
+    def _system_prompt(self) -> str:
+        categories = ", ".join(
+            member.value
+            for member in self._category_enum
+            if member is not self._unknown
+        )
+        parts = [
+            "Classify each free-text description into exactly one category. ",
+            f"Categories: {categories}. ",
+        ]
+        if self._extra_instructions:
+            parts.append(self._extra_instructions + " ")
+        parts.append(
+            "Reply with only a JSON object mapping each original description "
+            "to its category, and nothing else."
+        )
+        return "".join(parts)
+
+    def _to_category(self, value: Any) -> E:
+        """Map a reply value to a category member, defaulting to UNKNOWN."""
+        try:
+            return self._category_enum(value)
+        except ValueError:
+            return self._unknown
+
+    @staticmethod
+    def _strip_code_fence(reply: str) -> str:
+        """Remove a surrounding markdown code fence, if ChatGPT added one."""
+        text = reply.strip()
+        if not text.startswith("```"):
+            return text
+        lines = text.splitlines()[1:]
+        if lines and lines[-1].strip() == "```":
+            lines = lines[:-1]
+        return "\n".join(lines)
--- a/infrastructure/chatgpt/exceptions.py
+++ b/infrastructure/chatgpt/exceptions.py
@ -0,0 +1,2 @@
+class ChatGPTClientError(Exception):
+    """Base for all OpenAI client errors."""
--- a/infrastructure/csv_s3_client.py
+++ b/infrastructure/csv_s3_client.py
@ -1,33 +0,0 @@
-import csv
-from io import StringIO
-
-from infrastructure.s3_client import S3Client
-from infrastructure.s3_uri import parse_s3_uri
-
-
-class CsvS3Client(S3Client):
-    def read_rows(self, s3_uri: str) -> list[dict[str, str]]:
-        bucket, key = parse_s3_uri(s3_uri)
-        if bucket != self.bucket:
-            raise ValueError(
-                f"s3_uri bucket {bucket!r} does not match client bucket {self.bucket!r}"
-            )
-        raw = self.get_object(key)
-        try:
-            text = raw.decode("utf-8-sig")
-        except UnicodeDecodeError:
-            # Some uploads are Windows-1252 (e.g. £ as byte 0xA3), not UTF-8.
-            text = raw.decode("cp1252")
-
-        reader = csv.DictReader(StringIO(text))
-        return [dict(row) for row in reader]
-
-    def save_rows(self, rows: list[dict[str, str]], key: str) -> str:
-        if not rows:
-            raise ValueError("Cannot save an empty rows list: header is unknown")
-        buffer = StringIO()
-        fieldnames = list(rows[0].keys())
-        writer = csv.DictWriter(buffer, fieldnames=fieldnames)
-        writer.writeheader()
-        writer.writerows(rows)
-        return self.put_object(key, buffer.getvalue().encode("utf-8"))
--- a/infrastructure/landlord_overrides/init.py
+++ b/infrastructure/landlord_overrides/init.py
--- a/infrastructure/landlord_overrides/landlord_overrides_postgres_repository.py
+++ b/infrastructure/landlord_overrides/landlord_overrides_postgres_repository.py
@ -0,0 +1,97 @@
+"""One Postgres adapter for every ``landlord_<category>_overrides`` table.
+
+The four override categories (property / built-form / wall / roof type) share
+an identical write path: the same ``(portfolio_id, description) -> value`` row
+shape and the same source-aware conflict policy (ADR-0003 §Decision fixes one
+policy for all of them). The only thing that varies per category is the target
+table, which is selected by the SQLModel row class handed to the constructor.
+
+So a single generic adapter serves all four -- callers parameterise it by the
+category enum ``E`` and pass the matching ``…Row`` class:
+
+    LandlordOverridesRepository[PropertyType](session, LandlordPropertyTypeOverrideRow)
+
+This adapter -- the aggregate's "talker" to Postgres -- lives under
+``infrastructure/landlord_overrides/``, grouped by aggregate. The ``table=True``
+SQLModel row classes it writes through stay in ``infrastructure/postgres/`` as
+schema mirrors (they share the ``override_source`` pgEnum and register against
+the same engine metadata). See ADR-0003 §File layout.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from enum import Enum
+from typing import TypeVar, cast
+
+from sqlalchemy import Table
+from sqlalchemy.dialects.postgresql import insert as pg_insert
+from sqlmodel import Session, SQLModel
+
+from infrastructure.postgres.landlord_override_enums import OverrideSource
+from repositories.landlord_overrides.landlord_override_repository import (
+    LandlordOverrideRepository,
+)
+
+E = TypeVar("E", bound=Enum)
+
+
+class LandlordOverridesRepository(LandlordOverrideRepository[E]):
+    """Writes classifier overrides to the ``landlord_<category>_overrides`` table
+    backing ``row_type``.
+
+    ``row_type`` is the SQLModel mirror of the target table (e.g.
+    ``LandlordPropertyTypeOverrideRow``); its category enum must match the type
+    argument ``E`` the caller binds. The pairing is a convention, not a type
+    constraint -- the row classes are not generic over their value enum -- so
+    keep the two in step at the call site.
+    """
+
+    def __init__(self, session: Session, row_type: type[SQLModel]) -> None:
+        self._session = session
+        # SQLModel's class-level ``__table__`` is injected at runtime on
+        # ``table=True`` classes but isn't exposed by the stubs; pin it to
+        # ``Table`` via ``getattr`` so the dialect insert helper carries
+        # through with strict types.
+        self._table: Table = cast(Table, getattr(row_type, "__table__"))
+
+    def upsert_all(
+        self,
+        portfolio_id: int,
+        descriptions_to_values: dict[str, E],
+    ) -> None:
+        if not descriptions_to_values:
+            return
+
+        now = datetime.now(timezone.utc)
+        rows = [
+            {
+                "portfolio_id": portfolio_id,
+                "description": description,
+                "value": value.value,
+                "source": OverrideSource.CLASSIFIER,
+                "created_at": now,
+                "updated_at": now,
+            }
+            for description, value in descriptions_to_values.items()
+        ]
+
+        stmt = pg_insert(self._table).values(rows)
+
+        # The classifier may refresh its own past output, but must never
+        # overwrite a user correction -- the ``WHERE existing.source =
+        # 'classifier'`` guard enforces that. See ADR-0003 §Decision.
+        stmt = stmt.on_conflict_do_update(
+            index_elements=["portfolio_id", "description"],
+            set_={
+                "value": stmt.excluded.value,
+                "source": stmt.excluded.source,
+                "updated_at": stmt.excluded.updated_at,
+            },
+            where=self._table.c.source == OverrideSource.CLASSIFIER,
+        )
+
+        # SQLModel re-exports SQLAlchemy's ``Session.execute``; one of the
+        # overload signatures is marked deprecated in stubs, which fires
+        # here even though our INSERT path is the supported one.
+        self._session.execute(stmt)  # pyright: ignore[reportDeprecated]
--- a/infrastructure/openai/init.py
+++ b/infrastructure/openai/init.py
--- a/infrastructure/openai/exceptions.py
+++ b/infrastructure/openai/exceptions.py
@ -0,0 +1,2 @@
+class OpenAiClientError(Exception):
+    """Base for all OpenAI client errors."""
--- a/infrastructure/openai/openai_client.py
+++ b/infrastructure/openai/openai_client.py
@ -0,0 +1,60 @@
+from __future__ import annotations
+
+import os
+from typing import Optional
+
+from openai import OpenAI
+from openai.types.chat import ChatCompletionMessageParam
+
+from infrastructure.openai.exceptions import OpenAiClientError
+
+
+class OpenAiChatClient:
+    """Thin wrapper over the OpenAI Chat Completions API.
+
+    Sends a single prompt and returns the assistant's reply as plain text.
+    """
+
+    DEFAULT_MODEL = "gpt-4o-mini"
+
+    def __init__(
+        self,
+        api_key: Optional[str] = None,
+        model: Optional[str] = None,
+    ) -> None:
+        key = api_key or os.environ.get("OPENAI_API_KEY")
+        if not key:
+            raise OpenAiClientError(
+                "No OpenAI API key provided. "
+                "Pass api_key or set the OPENAI_API_KEY environment variable."
+            )
+        self._client = OpenAI(api_key=key)
+        self._model = model or self.DEFAULT_MODEL
+
+    def generate(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+    ) -> str:
+        """Send a prompt to the model and return its reply text.
+
+        Args:
+            prompt: The user message to send.
+            system_prompt: Optional instruction that sets the model's behaviour.
+
+        Raises:
+            OpenAiClientError: If the model returns an empty response.
+        """
+        messages: list[ChatCompletionMessageParam] = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": prompt})
+
+        response = self._client.chat.completions.create(
+            model=self._model,
+            messages=messages,
+        )
+        content = response.choices[0].message.content
+        if content is None:
+            raise OpenAiClientError("OpenAI returned an empty response.")
+        return content
--- a/infrastructure/postgres/engine.py
+++ b/infrastructure/postgres/engine.py
@ -1,3 +1,6 @@
+from collections.abc import Iterator
+from contextlib import contextmanager
+
 from sqlalchemy.engine import Engine
 from sqlmodel import Session, create_engine

@ -16,3 +19,42 @@ def make_engine(config: PostgresConfig) -> Engine:

 def make_session(engine: Engine) -> Session:
    return Session(engine)
+
+
+@contextmanager  # pyright: ignore[reportDeprecated]
+def transactional_session(engine: Engine) -> Iterator[Session]:
+    """Yield a session whose lifecycle owns the transaction.
+
+    On clean exit the session commits; on any exception it rolls back and
+    re-raises. Either way the session is closed. Callers in the application
+    layer can do their work inside the ``with`` block without ever invoking
+    ``.commit()`` / ``.rollback()`` themselves -- transaction semantics stay
+    in the infrastructure layer.
+    """
+    session = Session(engine)
+    try:
+        yield session
+        session.commit()
+    except Exception:
+        session.rollback()
+        raise
+    finally:
+        session.close()
+
+
+@contextmanager  # pyright: ignore[reportDeprecated]
+def commit_scope(session: Session) -> Iterator[Session]:
+    """Commit a caller-owned session on clean exit; roll back on error.
+
+    Like ``transactional_session`` but for a session the caller already holds
+    and will close itself. Use it to keep slow, non-DB work *outside* the
+    transaction: build the session, run the slow work, then enter
+    ``commit_scope`` only for the persistence -- so a connection is checked out
+    (SQLModel sessions are lazy) for the shortest possible window.
+    """
+    try:
+        yield session
+        session.commit()
+    except Exception:
+        session.rollback()
+        raise
--- a/infrastructure/postgres/landlord_built_form_type_override_table.py
+++ b/infrastructure/postgres/landlord_built_form_type_override_table.py
@ -0,0 +1,69 @@
+"""SQLModel mirror of the ``landlord_built_form_type_overrides`` Drizzle table.
+
+The schema source of truth lives in the ``assessment-model`` TS repo
+(`src/app/db/schema/landlord_overrides.ts`). The migrations are owned there;
+this row class only mirrors the columns so the Python lambda can read/write.
+See ADR-0003. Shape mirrors ``LandlordPropertyTypeOverrideRow`` -- the only
+differences are the table name, the ``built_form_type`` pgEnum on ``value``,
+and the unique-constraint name.
+"""
+
+from datetime import datetime, timezone
+from typing import ClassVar
+from uuid import UUID, uuid4
+
+from sqlalchemy import BigInteger, Column, UniqueConstraint
+from sqlalchemy import Enum as SAEnum
+from sqlmodel import Field, SQLModel
+
+from domain.epc.built_form_type import BuiltFormType
+from infrastructure.postgres.landlord_override_enums import override_source_sa_enum
+
+
+class LandlordBuiltFormTypeOverrideRow(SQLModel, table=True):
+    __tablename__: ClassVar[str] = "landlord_built_form_type_overrides"  # pyright: ignore[reportIncompatibleVariableOverride]
+    __table_args__: ClassVar[tuple[UniqueConstraint, ...]] = (  # pyright: ignore[reportIncompatibleVariableOverride]
+        UniqueConstraint(
+            "portfolio_id",
+            "description",
+            name="landlord_built_form_type_overrides_portfolio_description_unique",
+        ),
+    )
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True)
+
+    # bigint to match the Drizzle ``portfolio_id`` FK; SQLModel's default int
+    # mapping is 32-bit Integer and would overflow once portfolio IDs exceed
+    # 2^31. The FK to ``portfolio.id`` is enforced by the Drizzle migration,
+    # not declared here -- the ``portfolio`` table is not modelled in Python.
+    portfolio_id: int = Field(
+        sa_column=Column(BigInteger, nullable=False, index=True),
+    )
+
+    description: str = Field(nullable=False)
+
+    value: BuiltFormType = Field(
+        sa_column=Column(
+            SAEnum(
+                BuiltFormType,
+                name="built_form_type",
+                values_callable=lambda cls: [m.value for m in cls],  # pyright: ignore[reportUnknownLambdaType, reportUnknownMemberType, reportUnknownVariableType]
+            ),
+            nullable=False,
+        ),
+    )
+
+    # Shared SAEnum -- see ``landlord_override_enums`` for why this single
+    # instance is reused by every ``landlord_*_overrides`` row class.
+    source: str = Field(
+        sa_column=Column(override_source_sa_enum, nullable=False),
+    )
+
+    created_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
+    updated_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
--- a/infrastructure/postgres/landlord_override_enums.py
+++ b/infrastructure/postgres/landlord_override_enums.py
@ -0,0 +1,35 @@
+"""Shared pgEnum definitions used by every ``landlord_*_overrides`` row class.
+
+The ``override_source`` pgEnum is referenced by both
+``landlord_property_type_overrides`` and ``landlord_wall_type_overrides``
+(per the Drizzle schema -- see ``landlord_overrides.ts``). Defining it once
+here and reusing the same SQLAlchemy ``Enum`` instance across both row
+classes keeps SQLModel's metadata coherent: ``create_all`` emits exactly one
+``CREATE TYPE override_source`` statement, not two parallel ones colliding
+on the same pgEnum name.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import Enum as SAEnum
+
+
+class OverrideSource:
+    """Mirror of the ``override_source`` pgEnum.
+
+    Drizzle defines this as ``('classifier', 'user')`` in
+    ``landlord_overrides.ts``. Modelled here as string constants so callers
+    don't sprinkle magic strings; the column is constrained by Postgres,
+    and the only Python-side producer (the classifier path) writes the
+    literal ``OverrideSource.CLASSIFIER``.
+    """
+
+    CLASSIFIER = "classifier"
+    USER = "user"
+
+
+override_source_sa_enum = SAEnum(
+    OverrideSource.CLASSIFIER,
+    OverrideSource.USER,
+    name="override_source",
+)
--- a/infrastructure/postgres/landlord_property_type_override_table.py
+++ b/infrastructure/postgres/landlord_property_type_override_table.py
@ -0,0 +1,67 @@
+"""SQLModel mirror of the ``landlord_property_type_overrides`` Drizzle table.
+
+The schema source of truth lives in the ``assessment-model`` TS repo
+(`src/app/db/schema/landlord_overrides.ts`). The migrations are owned there;
+this row class only mirrors the columns so the Python lambda can read/write.
+See ADR-0003.
+"""
+
+from datetime import datetime, timezone
+from typing import ClassVar
+from uuid import UUID, uuid4
+
+from sqlalchemy import BigInteger, Column, UniqueConstraint
+from sqlalchemy import Enum as SAEnum
+from sqlmodel import Field, SQLModel
+
+from domain.epc.property_type import PropertyType
+from infrastructure.postgres.landlord_override_enums import override_source_sa_enum
+
+
+class LandlordPropertyTypeOverrideRow(SQLModel, table=True):
+    __tablename__: ClassVar[str] = "landlord_property_type_overrides"  # pyright: ignore[reportIncompatibleVariableOverride]
+    __table_args__: ClassVar[tuple[UniqueConstraint, ...]] = (  # pyright: ignore[reportIncompatibleVariableOverride]
+        UniqueConstraint(
+            "portfolio_id",
+            "description",
+            name="landlord_property_type_overrides_portfolio_description_unique",
+        ),
+    )
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True)
+
+    # bigint to match the Drizzle ``portfolio_id`` FK; SQLModel's default int
+    # mapping is 32-bit Integer and would overflow once portfolio IDs exceed
+    # 2^31. The FK to ``portfolio.id`` is enforced by the Drizzle migration,
+    # not declared here -- the ``portfolio`` table is not modelled in Python.
+    portfolio_id: int = Field(
+        sa_column=Column(BigInteger, nullable=False, index=True),
+    )
+
+    description: str = Field(nullable=False)
+
+    value: PropertyType = Field(
+        sa_column=Column(
+            SAEnum(
+                PropertyType,
+                name="property_type",
+                values_callable=lambda cls: [m.value for m in cls],  # pyright: ignore[reportUnknownLambdaType, reportUnknownMemberType, reportUnknownVariableType]
+            ),
+            nullable=False,
+        ),
+    )
+
+    # Shared SAEnum -- see ``landlord_override_enums`` for why this single
+    # instance is reused by every ``landlord_*_overrides`` row class.
+    source: str = Field(
+        sa_column=Column(override_source_sa_enum, nullable=False),
+    )
+
+    created_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
+    updated_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
--- a/infrastructure/postgres/landlord_roof_type_override_table.py
+++ b/infrastructure/postgres/landlord_roof_type_override_table.py
@ -0,0 +1,69 @@
+"""SQLModel mirror of the ``landlord_roof_type_overrides`` Drizzle table.
+
+The schema source of truth lives in the ``assessment-model`` TS repo
+(`src/app/db/schema/landlord_overrides.ts`). The migrations are owned there;
+this row class only mirrors the columns so the Python lambda can read/write.
+See ADR-0003. Shape mirrors ``LandlordPropertyTypeOverrideRow`` -- the only
+differences are the table name, the ``roof_type`` pgEnum on ``value``, and
+the unique-constraint name.
+"""
+
+from datetime import datetime, timezone
+from typing import ClassVar
+from uuid import UUID, uuid4
+
+from sqlalchemy import BigInteger, Column, UniqueConstraint
+from sqlalchemy import Enum as SAEnum
+from sqlmodel import Field, SQLModel
+
+from domain.epc.roof_type import RoofType
+from infrastructure.postgres.landlord_override_enums import override_source_sa_enum
+
+
+class LandlordRoofTypeOverrideRow(SQLModel, table=True):
+    __tablename__: ClassVar[str] = "landlord_roof_type_overrides"  # pyright: ignore[reportIncompatibleVariableOverride]
+    __table_args__: ClassVar[tuple[UniqueConstraint, ...]] = (  # pyright: ignore[reportIncompatibleVariableOverride]
+        UniqueConstraint(
+            "portfolio_id",
+            "description",
+            name="landlord_roof_type_overrides_portfolio_description_unique",
+        ),
+    )
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True)
+
+    # bigint to match the Drizzle ``portfolio_id`` FK; SQLModel's default int
+    # mapping is 32-bit Integer and would overflow once portfolio IDs exceed
+    # 2^31. The FK to ``portfolio.id`` is enforced by the Drizzle migration,
+    # not declared here -- the ``portfolio`` table is not modelled in Python.
+    portfolio_id: int = Field(
+        sa_column=Column(BigInteger, nullable=False, index=True),
+    )
+
+    description: str = Field(nullable=False)
+
+    value: RoofType = Field(
+        sa_column=Column(
+            SAEnum(
+                RoofType,
+                name="roof_type",
+                values_callable=lambda cls: [m.value for m in cls],  # pyright: ignore[reportUnknownLambdaType, reportUnknownMemberType, reportUnknownVariableType]
+            ),
+            nullable=False,
+        ),
+    )
+
+    # Shared SAEnum -- see ``landlord_override_enums`` for why this single
+    # instance is reused by every ``landlord_*_overrides`` row class.
+    source: str = Field(
+        sa_column=Column(override_source_sa_enum, nullable=False),
+    )
+
+    created_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
+    updated_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
--- a/infrastructure/postgres/landlord_wall_type_override_table.py
+++ b/infrastructure/postgres/landlord_wall_type_override_table.py
@ -0,0 +1,69 @@
+"""SQLModel mirror of the ``landlord_wall_type_overrides`` Drizzle table.
+
+The schema source of truth lives in the ``assessment-model`` TS repo
+(`src/app/db/schema/landlord_overrides.ts`). The migrations are owned there;
+this row class only mirrors the columns so the Python lambda can read/write.
+See ADR-0003. Shape mirrors ``LandlordPropertyTypeOverrideRow`` -- the only
+differences are the table name, the ``wall_type`` pgEnum on ``value``, and
+the unique-constraint name.
+"""
+
+from datetime import datetime, timezone
+from typing import ClassVar
+from uuid import UUID, uuid4
+
+from sqlalchemy import BigInteger, Column, UniqueConstraint
+from sqlalchemy import Enum as SAEnum
+from sqlmodel import Field, SQLModel
+
+from domain.epc.wall_type import WallType
+from infrastructure.postgres.landlord_override_enums import override_source_sa_enum
+
+
+class LandlordWallTypeOverrideRow(SQLModel, table=True):
+    __tablename__: ClassVar[str] = "landlord_wall_type_overrides"  # pyright: ignore[reportIncompatibleVariableOverride]
+    __table_args__: ClassVar[tuple[UniqueConstraint, ...]] = (  # pyright: ignore[reportIncompatibleVariableOverride]
+        UniqueConstraint(
+            "portfolio_id",
+            "description",
+            name="landlord_wall_type_overrides_portfolio_description_unique",
+        ),
+    )
+
+    id: UUID = Field(default_factory=uuid4, primary_key=True)
+
+    # bigint to match the Drizzle ``portfolio_id`` FK; SQLModel's default int
+    # mapping is 32-bit Integer and would overflow once portfolio IDs exceed
+    # 2^31. The FK to ``portfolio.id`` is enforced by the Drizzle migration,
+    # not declared here -- the ``portfolio`` table is not modelled in Python.
+    portfolio_id: int = Field(
+        sa_column=Column(BigInteger, nullable=False, index=True),
+    )
+
+    description: str = Field(nullable=False)
+
+    value: WallType = Field(
+        sa_column=Column(
+            SAEnum(
+                WallType,
+                name="wall_type",
+                values_callable=lambda cls: [m.value for m in cls],  # pyright: ignore[reportUnknownLambdaType, reportUnknownMemberType, reportUnknownVariableType]
+            ),
+            nullable=False,
+        ),
+    )
+
+    # Shared SAEnum -- see ``landlord_override_enums`` for why this single
+    # instance is reused by every ``landlord_*_overrides`` row class.
+    source: str = Field(
+        sa_column=Column(override_source_sa_enum, nullable=False),
+    )
+
+    created_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
+    updated_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc),
+        nullable=False,
+    )
--- a/infrastructure/s3/init.py
+++ b/infrastructure/s3/init.py
--- a/infrastructure/s3/csv_s3_client.py
+++ b/infrastructure/s3/csv_s3_client.py
@ -0,0 +1,62 @@
+import csv
+from io import StringIO
+
+from infrastructure.s3.s3_client import S3Client
+from infrastructure.s3.s3_uri import parse_s3_uri
+
+
+def _dedupe_fieldnames(fieldnames: list[str]) -> list[str]:
+    """Disambiguate repeated CSV headers by appending an index.
+
+    The first occurrence keeps its name; each later one becomes
+    ``name_1``, ``name_2`` … so duplicate columns survive as distinct
+    keys instead of collapsing onto one (last-wins) dict entry.
+    """
+    deduped: list[str] = []
+    counts: dict[str, int] = {}
+    for name in fieldnames:
+        if name not in counts:
+            counts[name] = 0
+            deduped.append(name)
+            continue
+        counts[name] += 1
+        candidate = f"{name}_{counts[name]}"
+        while candidate in counts:
+            counts[name] += 1
+            candidate = f"{name}_{counts[name]}"
+        counts[candidate] = 0
+        deduped.append(candidate)
+    return deduped
+
+
+class CsvS3Client(S3Client):
+    def read_rows(self, s3_uri: str) -> list[dict[str, str]]:
+        bucket, key = parse_s3_uri(s3_uri)
+        if bucket != self.bucket:
+            raise ValueError(
+                f"s3_uri bucket {bucket!r} does not match client bucket {self.bucket!r}"
+            )
+        raw = self.get_object(key)
+        try:
+            text = raw.decode("utf-8-sig")
+        except UnicodeDecodeError:
+            # Some uploads are Windows-1252 (e.g. £ as byte 0xA3), not UTF-8.
+            text = raw.decode("cp1252")
+
+        buffer = StringIO(text)
+        header = next(csv.reader(buffer), None)
+        if header is None:
+            return []
+        fieldnames = _dedupe_fieldnames(header)
+        reader = csv.DictReader(buffer, fieldnames=fieldnames)
+        return [dict(row) for row in reader]
+
+    def save_rows(self, rows: list[dict[str, str]], key: str) -> str:
+        if not rows:
+            raise ValueError("Cannot save an empty rows list: header is unknown")
+        buffer = StringIO()
+        fieldnames = list(rows[0].keys())
+        writer = csv.DictWriter(buffer, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(rows)
+        return self.put_object(key, buffer.getvalue().encode("utf-8"))
--- a/infrastructure/s3/s3_client.py
+++ b/infrastructure/s3/s3_client.py
--- a/infrastructure/s3/s3_uri.py
+++ b/infrastructure/s3/s3_uri.py
--- a/orchestration/classifiable_column.py
+++ b/orchestration/classifiable_column.py
@ -0,0 +1,37 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import Generic, TypeVar
+
+from domain.data_transformation.column_classifier import ColumnClassifier
+from repositories.landlord_overrides.landlord_override_repository import (
+    LandlordOverrideRepository,
+)
+
+E = TypeVar("E", bound=Enum)
+
+
+@dataclass(frozen=True)
+class ClassifiableColumn(Generic[E]):
+    """Pairs a column's classifier with the repository that persists its results.
+
+    The orchestrator registers one ``ClassifiableColumn`` per
+    (source column, target enum) pair. Bundling the classifier and the
+    repository together makes the "this enum lands in this table" invariant
+    structural -- the handler can no longer wire ``PropertyType``
+    classifications to a ``WallType`` repo by keying two dicts with the same
+    string.
+
+    ``source_column`` is the landlord-CSV header to read from; ``name`` is the
+    unique key the orchestrator uses to report this classification's results
+    (and the key the handler logs). Two ``ClassifiableColumn``s may share a
+    ``source_column`` -- e.g. the ``"Property Type"`` CSV column feeds both
+    ``PropertyType`` and ``BuiltFormType`` classifiers off the same free-text
+    description -- but each must have a unique ``name``.
+    """
+
+    name: str
+    source_column: str
+    classifier: ColumnClassifier[E]
+    repo: LandlordOverrideRepository[E]
--- a/orchestration/landlord_description_overrides_orchestrator.py
+++ b/orchestration/landlord_description_overrides_orchestrator.py
@ -0,0 +1,142 @@
+from enum import Enum
+from typing import Any
+
+from domain.addresses.unstandardised_address import AddressList
+from orchestration.classifiable_column import ClassifiableColumn
+from repositories.unstandardised_address.unstandardised_address_list_repository import (
+    UnstandardisedAddressListRepository,
+)
+
+
+class LandlordDescriptionOverridesOrchestrator:
+    def __init__(
+        self,
+        unstandardised_address_repo: UnstandardisedAddressListRepository,
+        columns: list[ClassifiableColumn[Any]],
+    ) -> None:
+        self._unstandardised_address_repo = unstandardised_address_repo
+        # Each entry is one (source CSV column, target enum) classification.
+        # Two entries may share ``source_column`` -- e.g. ``"Property Type"``
+        # feeds both PropertyType and BuiltFormType classifiers -- so the
+        # registry is a list rather than a dict keyed by header.
+        self._columns = columns
+
+    def get_unstandardised_addresses(
+        self,
+        input_s3_uri: str,
+    ) -> AddressList:
+        return self._unstandardised_address_repo.load_batch(input_s3_uri)
+
+    def get_col_to_description_mappings(
+        self, list_of_unstandardised_address: AddressList
+    ) -> dict[str, set[str]]:
+        mappings: dict[str, set[str]] = {}
+        for unstandardised_address in list_of_unstandardised_address:
+            for key, value in unstandardised_address.additional_info.items():
+                bucket = mappings.setdefault(key, set())
+                # A comma-separated value is several descriptions in one cell;
+                # split it so each is its own entry. Lower-case so case-only
+                # typos collapse to one variant.
+                for variant in value.split(","):
+                    variant = variant.strip().lower()
+                    if variant:
+                        bucket.add(variant)
+        return mappings
+
+    def classify_columns(
+        self, addresses: AddressList
+    ) -> dict[str, dict[str, Enum]]:
+        """Classify every registered column's descriptions.
+
+        Returns a mapping of ``ClassifiableColumn.name`` to
+        ``{description: category}``. A registered column whose ``source_column``
+        is absent from the addresses contributes an empty inner mapping.
+        """
+        col_to_desc = self.get_col_to_description_mappings(addresses)
+        return {
+            column.name: column.classifier.classify(
+                col_to_desc.get(column.source_column, set())
+            )
+            for column in self._columns
+        }
+
+    def persist(
+        self, classified: dict[str, dict[str, Enum]], portfolio_id: int
+    ) -> None:
+        """Persist already-classified results via each column's repository.
+
+        ``classified`` is keyed by ``ClassifiableColumn.name`` -- the shape
+        ``classify_columns`` and ``classify_from_rows`` return. Each non-empty
+        mapping is written through the column's own repo under
+        ``source = 'classifier'``; an empty mapping (a registered column absent
+        from this batch) skips the DB round-trip.
+
+        The orchestrator does not commit -- the caller owns the transaction
+        boundary, and is expected to open it only around this call so the
+        slow classification never holds a connection.
+        """
+        for column in self._columns:
+            mapping = classified.get(column.name)
+            if not mapping:
+                continue
+            column.repo.upsert_all(portfolio_id, mapping)
+
+    def classify_and_persist(
+        self, addresses: AddressList, portfolio_id: int
+    ) -> dict[str, dict[str, Enum]]:
+        """Classify every registered column and persist the results.
+
+        Returns the same shape as ``classify_columns`` so callers can log
+        per-column counts.
+        """
+        classified = self.classify_columns(addresses)
+        self.persist(classified, portfolio_id)
+        return classified
+
+    def classify_from_rows(
+        self, rows: list[dict[str, str]]
+    ) -> dict[str, dict[str, Enum]]:
+        """Classify raw CSV rows without touching the database.
+
+        The classification half of ``classify_and_persist_from_rows``, split
+        out so a caller can run the slow ChatGPT work *before* opening a
+        transaction and then write the finished results with ``persist`` inside
+        one short-lived connection.
+
+        Unlike the ``AddressList`` path this builds no ``AddressList``, so it
+        has no canonical address/postcode requirement -- the classifier only
+        needs the raw description cells. Used when reading the original
+        landlord upload (raw headers) rather than the address-matching CSV.
+        """
+        col_to_desc = self._descriptions_from_rows(rows)
+        return {
+            column.name: column.classifier.classify(
+                col_to_desc.get(column.source_column, set())
+            )
+            for column in self._columns
+        }
+
+    def classify_and_persist_from_rows(
+        self, rows: list[dict[str, str]], portfolio_id: int
+    ) -> dict[str, dict[str, Enum]]:
+        """Classify + persist straight from raw CSV rows in one call.
+
+        A convenience composition of ``classify_from_rows`` + ``persist``.
+        Prefer calling the two separately when classification is slow, so the
+        transaction opens only around ``persist`` (see the Lambda handler).
+        """
+        classified = self.classify_from_rows(rows)
+        self.persist(classified, portfolio_id)
+        return classified
+
+    @staticmethod
+    def _descriptions_from_rows(rows: list[dict[str, str]]) -> dict[str, set[str]]:
+        mappings: dict[str, set[str]] = {}
+        for row in rows:
+            for key, value in row.items():
+                bucket = mappings.setdefault(key, set())
+                for variant in (value or "").split(","):
+                    variant = variant.strip().lower()
+                    if variant:
+                        bucket.add(variant)
+        return mappings
--- a/orchestration/postcode_splitter_orchestrator.py
+++ b/orchestration/postcode_splitter_orchestrator.py
@ -5,19 +5,21 @@ from uuid import UUID
 from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
 from orchestration.task_orchestrator import TaskOrchestrator
 from domain.addresses.postcode_batching import iter_postcode_grouped_batches
-from repositories.user_address.user_address_repository import UserAddressRepository
+from repositories.unstandardised_address.unstandardised_address_list_repository import (
+    UnstandardisedAddressListRepository,
+)


 class PostcodeSplitterOrchestrator:
    def __init__(
        self,
        task_orchestrator: TaskOrchestrator,
-        user_address_repo: UserAddressRepository,
+        unstandardised_address_repo: UnstandardisedAddressListRepository,
        queue_client: Address2UprnQueueClient,
        max_batch_size: int = 500,
    ) -> None:
        self._task_orchestrator = task_orchestrator
-        self._user_address_repo = user_address_repo
+        self._unstandardised_address_repo = unstandardised_address_repo
        self._queue_client = queue_client
        self._max_batch_size = max_batch_size

@ -28,7 +30,7 @@ class PostcodeSplitterOrchestrator:
        parent_subtask_id: UUID,
        input_s3_uri: str,
    ) -> list[UUID]:
-        addresses = self._user_address_repo.load_batch(input_s3_uri)
+        addresses = self._unstandardised_address_repo.load_batch(input_s3_uri)
        path_prefix = (
            f"ara_postcode_splitter_batches/{parent_task_id}/{parent_subtask_id}"
        )
@ -37,7 +39,7 @@ class PostcodeSplitterOrchestrator:
        for batch in iter_postcode_grouped_batches(
            addresses, max_batch_size=self._max_batch_size
        ):
-            batch_uri = self._user_address_repo.save_batch(batch, path_prefix)
+            batch_uri = self._unstandardised_address_repo.save_batch(batch, path_prefix)
            child = self._task_orchestrator.create_child_subtask(
                parent_task_id,
                inputs={
--- a/pytest.ini
+++ b/pytest.ini
@ -26,5 +26,6 @@ testpaths =
    etl/hubspot/tests
    etl/spatial/tests
    domain/sap10_ml/tests
+    tests/
 markers =
    integration: mark a test as an integration test
--- a/repositories/landlord_overrides/init.py
+++ b/repositories/landlord_overrides/init.py
--- a/repositories/landlord_overrides/landlord_override_repository.py
+++ b/repositories/landlord_overrides/landlord_override_repository.py
@ -0,0 +1,40 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from enum import Enum
+from typing import Generic, TypeVar
+
+E = TypeVar("E", bound=Enum)
+
+
+class LandlordOverrideRepository(ABC, Generic[E]):
+    """Port: persists landlord (description -> category) overrides for a portfolio.
+
+    One repository implementation targets one ``landlord_<category>_overrides``
+    table. The category enum ``E`` (e.g. ``PropertyType``, ``WallType``) determines
+    which table the adapter writes to; the orchestrator depends only on this
+    interface and never names a concrete table.
+
+    A single concrete adapter,
+    ``infrastructure/landlord_overrides/landlord_overrides_postgres_repository.LandlordOverridesRepository``,
+    serves every category (see ADR-0003) -- it is parameterised by the
+    SQLModel row class for the target table.
+    """
+
+    @abstractmethod
+    def upsert_all(
+        self,
+        portfolio_id: int,
+        descriptions_to_values: dict[str, E],
+    ) -> None:
+        """Upsert each ``(portfolio_id, description) -> value`` row with ``source='classifier'``.
+
+        On conflict with an existing row whose ``source = 'classifier'``, the row
+        is updated (value, source, updated_at). On conflict with a row whose
+        ``source = 'user'``, the existing row is preserved -- the classifier
+        never overwrites a user correction. See ADR-0003 §Decision.
+
+        An empty ``descriptions_to_values`` mapping is a no-op; callers may
+        skip this call entirely when they have nothing to write.
+        """
+        ...
--- a/repositories/unstandardised_address/init.py
+++ b/repositories/unstandardised_address/init.py
--- a/repositories/unstandardised_address/unstandardised_address_list_csv_s3_repository.py
+++ b/repositories/unstandardised_address/unstandardised_address_list_csv_s3_repository.py
@ -4,10 +4,12 @@ import uuid
 from datetime import datetime, timezone
 from typing import Optional

-from domain.addresses.user_address import UserAddress
+from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
 from domain.postcode import Postcode
-from infrastructure.csv_s3_client import CsvS3Client
-from repositories.user_address.user_address_repository import UserAddressRepository
+from infrastructure.s3.csv_s3_client import CsvS3Client
+from repositories.unstandardised_address.unstandardised_address_list_repository import (
+    UnstandardisedAddressListRepository,
+)

 _ADDRESS_COLUMNS: tuple[str, str, str] = ("Address 1", "Address 2", "Address 3")
 _POSTCODE_COLUMN: str = "postcode"
@ -15,42 +17,45 @@ _INTERNAL_REFERENCE_COLUMN: str = "Internal Reference"
 _POSTCODE_CLEAN_COLUMN: str = "postcode_clean"


-class UserAddressCsvS3Repository(UserAddressRepository):
+class UnstandardisedAddressListCsvS3Repository(UnstandardisedAddressListRepository):
    def __init__(self, csv_client: CsvS3Client, bucket: str) -> None:
        self._csv_client = csv_client
        self._bucket = bucket

-    def load_batch(self, s3_uri: str) -> list[UserAddress]:
+    def load_batch(self, s3_uri: str) -> AddressList:
        rows = self._csv_client.read_rows(s3_uri)
        if rows and _POSTCODE_COLUMN not in rows[0]:
            raise ValueError(
                f"Input CSV {s3_uri} has no {_POSTCODE_COLUMN!r} column; "
                f"columns present: {sorted(rows[0])}"
            )
-        addresses: list[UserAddress] = []
+        addresses: AddressList = AddressList([])
        for row in rows:
            parts = [
                row[col].strip()
                for col in _ADDRESS_COLUMNS
                if col in row and row[col].strip()
            ]
-            user_address = ", ".join(parts)
+            unstandardised_address = ", ".join(parts)
            postcode = row.get(_POSTCODE_COLUMN, "")
            raw_ref = row.get(_INTERNAL_REFERENCE_COLUMN, "").strip()
            internal_reference: Optional[str] = raw_ref or None
            addresses.append(
-                UserAddress(
-                    user_address=user_address,
+                UnstandardisedAddress(
+                    address=unstandardised_address,
                    postcode=Postcode(postcode),
-                    internal_reference=internal_reference,
-                    source_row=row,
+                    org_reference=internal_reference,
+                    additional_info=row,
                )
            )
        return addresses

-    def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str:
+    def save_batch(self, addresses: AddressList, path_prefix: str) -> str:
        rows: list[dict[str, str]] = [
-            {**addr.source_row, _POSTCODE_CLEAN_COLUMN: str(addr.postcode)}
+            {
+                **addr.additional_info,
+                _POSTCODE_CLEAN_COLUMN: str(addr.postcode),
+            }
            for addr in addresses
        ]

--- a/repositories/unstandardised_address/unstandardised_address_list_repository.py
+++ b/repositories/unstandardised_address/unstandardised_address_list_repository.py
@ -0,0 +1,13 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+
+from domain.addresses.unstandardised_address import AddressList
+
+
+class UnstandardisedAddressListRepository(ABC):
+    @abstractmethod
+    def load_batch(self, s3_uri: str) -> AddressList: ...
+
+    @abstractmethod
+    def save_batch(self, addresses: AddressList, path_prefix: str) -> str: ...
--- a/repositories/user_address/user_address_repository.py
+++ b/repositories/user_address/user_address_repository.py
@ -1,13 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-
-from domain.addresses.user_address import UserAddress
-
-
-class UserAddressRepository(ABC):
-    @abstractmethod
-    def load_batch(self, s3_uri: str) -> list[UserAddress]: ...
-
-    @abstractmethod
-    def save_batch(self, addresses: list[UserAddress], path_prefix: str) -> str: ...
--- a/test.requirements.txt
+++ b/test.requirements.txt
@ -10,4 +10,5 @@ fuzzywuzzy
 pymupdf
 playwright==1.58.0
 msal
-moto[s3,sqs]
+moto[s3,sqs]
+openai==1.93.0
--- a/tests/domain/addresses/test_postcode_batching.py
+++ b/tests/domain/addresses/test_postcode_batching.py
@ -1,17 +1,17 @@
 import pytest

 from domain.addresses.postcode_batching import iter_postcode_grouped_batches
-from domain.addresses.user_address import UserAddress
+from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
 from domain.postcode import Postcode


-def _addrs(postcode: str, n: int) -> list[UserAddress]:
-    return [
-        UserAddress(
-            user_address=f"{i} {postcode} Street", postcode=Postcode(postcode)
-        )
-        for i in range(n)
-    ]
+def _addrs(postcode: str, n: int) -> AddressList:
+    return AddressList(
+        [
+            UnstandardisedAddress(address=f"{i} {postcode} Street", postcode=Postcode(postcode))
+            for i in range(n)
+        ]
+    )


 def test_empty_input_yields_no_batches() -> None:
@ -74,9 +74,7 @@ def test_oversize_group_flushes_existing_buffer_first() -> None:
    big = _addrs("BB2 2BB", 7)
    tail = _addrs("CC3 3CC", 1)
    # act
-    batches = list(
-        iter_postcode_grouped_batches(small + big + tail, max_batch_size=5)
-    )
+    batches = list(iter_postcode_grouped_batches(small + big + tail, max_batch_size=5))
    # assert
    assert len(batches) == 3
    assert [str(a.postcode) for a in batches[0]] == ["AA11AA", "AA11AA"]
--- a/tests/domain/addresses/test_unstandardised_address.py
+++ b/tests/domain/addresses/test_unstandardised_address.py
@ -0,0 +1,96 @@
+import dataclasses
+
+import pytest
+
+from domain.addresses.unstandardised_address import UnstandardisedAddress
+from domain.postcode import Postcode
+
+
+def test_unstandardised_address_holds_postcode_value_object() -> None:
+    # act
+    addr = UnstandardisedAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    # assert
+    assert addr.postcode == Postcode("SW1A1AA")
+
+
+def test_unstandardised_address_preserves_unstandardised_address_verbatim() -> None:
+    # The free-text unstandardised_address string is intentionally NOT normalised --
+    # only the postcode is canonicalised, and that happens inside Postcode.
+    # act
+    addr = UnstandardisedAddress(address="  1 The   Street  ", postcode=Postcode("SW1A1AA"))
+    # assert
+    assert addr.address == "  1 The   Street  "
+
+
+def test_unstandardised_address_internal_reference_defaults_to_none() -> None:
+    # act
+    addr = UnstandardisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    # assert
+    assert addr.org_reference is None
+
+
+def test_unstandardised_address_internal_reference_accepted() -> None:
+    # act
+    addr = UnstandardisedAddress(
+        address="1 The Street",
+        postcode=Postcode("SW1A1AA"),
+        org_reference="cust-42",
+    )
+    # assert
+    assert addr.org_reference == "cust-42"
+
+
+def test_unstandardised_address_is_frozen() -> None:
+    # arrange
+    addr = UnstandardisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    # act / assert
+    with pytest.raises(dataclasses.FrozenInstanceError):
+        addr.postcode = Postcode("OTHER")  # type: ignore[misc]
+
+
+def test_unstandardised_address_equality_uses_canonical_postcode() -> None:
+    # Postcode sanitises eagerly, so addresses built from different surface
+    # forms of the same postcode compare equal.
+    # arrange
+    a = UnstandardisedAddress(address="1 The Street", postcode=Postcode("sw1a 1aa"))
+    b = UnstandardisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    # act / assert
+    assert a == b
+
+
+def test_unstandardised_address_source_row_defaults_to_empty_dict() -> None:
+    # act
+    addr = UnstandardisedAddress(address="1 The Street", postcode=Postcode("SW1A1AA"))
+    # assert
+    assert addr.additional_info == {}
+
+
+def test_unstandardised_address_carries_source_row() -> None:
+    # arrange
+    row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
+    # act
+    addr = UnstandardisedAddress(
+        address="1 The Street",
+        postcode=Postcode("SW1A 1AA"),
+        additional_info=row,
+    )
+    # assert
+    assert addr.additional_info == row
+
+
+def test_unstandardised_address_equality_ignores_source_row() -> None:
+    # source_row is excluded from equality (and hashing): identity stays
+    # defined by the parsed fields.
+    # arrange
+    a = UnstandardisedAddress(
+        address="1 The Street",
+        postcode=Postcode("SW1A1AA"),
+        additional_info={"x": "1"},
+    )
+    b = UnstandardisedAddress(
+        address="1 The Street",
+        postcode=Postcode("SW1A1AA"),
+        additional_info={"y": "2"},
+    )
+    # act / assert
+    assert a == b
--- a/tests/domain/addresses/test_user_address.py
+++ b/tests/domain/addresses/test_user_address.py
@ -1,98 +0,0 @@
-import dataclasses
-
-import pytest
-
-from domain.addresses.user_address import UserAddress
-from domain.postcode import Postcode
-
-
-def test_user_address_holds_postcode_value_object() -> None:
-    # act
-    addr = UserAddress(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
-    # assert
-    assert addr.postcode == Postcode("SW1A1AA")
-
-
-def test_user_address_preserves_user_address_verbatim() -> None:
-    # The free-text user_address string is intentionally NOT normalised --
-    # only the postcode is canonicalised, and that happens inside Postcode.
-    # act
-    addr = UserAddress(
-        user_address="  1 The   Street  ", postcode=Postcode("SW1A1AA")
-    )
-    # assert
-    assert addr.user_address == "  1 The   Street  "
-
-
-def test_user_address_internal_reference_defaults_to_none() -> None:
-    # act
-    addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
-    # assert
-    assert addr.internal_reference is None
-
-
-def test_user_address_internal_reference_accepted() -> None:
-    # act
-    addr = UserAddress(
-        user_address="1 The Street",
-        postcode=Postcode("SW1A1AA"),
-        internal_reference="cust-42",
-    )
-    # assert
-    assert addr.internal_reference == "cust-42"
-
-
-def test_user_address_is_frozen() -> None:
-    # arrange
-    addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
-    # act / assert
-    with pytest.raises(dataclasses.FrozenInstanceError):
-        addr.postcode = Postcode("OTHER")  # type: ignore[misc]
-
-
-def test_user_address_equality_uses_canonical_postcode() -> None:
-    # Postcode sanitises eagerly, so addresses built from different surface
-    # forms of the same postcode compare equal.
-    # arrange
-    a = UserAddress(user_address="1 The Street", postcode=Postcode("sw1a 1aa"))
-    b = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
-    # act / assert
-    assert a == b
-
-
-def test_user_address_source_row_defaults_to_empty_dict() -> None:
-    # act
-    addr = UserAddress(user_address="1 The Street", postcode=Postcode("SW1A1AA"))
-    # assert
-    assert addr.source_row == {}
-
-
-def test_user_address_carries_source_row() -> None:
-    # arrange
-    row = {"Address 1": "1 The Street", "postcode": "SW1A 1AA", "SAP Score": "72"}
-    # act
-    addr = UserAddress(
-        user_address="1 The Street",
-        postcode=Postcode("SW1A 1AA"),
-        source_row=row,
-    )
-    # assert
-    assert addr.source_row == row
-
-
-def test_user_address_equality_ignores_source_row() -> None:
-    # source_row is excluded from equality (and hashing): identity stays
-    # defined by the parsed fields.
-    # arrange
-    a = UserAddress(
-        user_address="1 The Street",
-        postcode=Postcode("SW1A1AA"),
-        source_row={"x": "1"},
-    )
-    b = UserAddress(
-        user_address="1 The Street",
-        postcode=Postcode("SW1A1AA"),
-        source_row={"y": "2"},
-    )
-    # act / assert
-    assert a == b
--- a/tests/infrastructure/chatgpt/init.py
+++ b/tests/infrastructure/chatgpt/init.py
--- a/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py
+++ b/tests/infrastructure/chatgpt/test_chatgpt_column_classifier.py
@ -0,0 +1,185 @@
+from __future__ import annotations
+
+from typing import Optional
+
+import pytest
+
+from domain.data_transformation.column_classifier import ClassificationError
+from domain.epc.property_type import PropertyType
+from domain.epc.wall_type import WallType
+from infrastructure.chatgpt.chatgpt import ChatGPT
+from infrastructure.chatgpt.chatgpt_column_classifier import (
+    ChatGptColumnClassifier,
+)
+from infrastructure.chatgpt.exceptions import ChatGPTClientError
+
+
+class _FakeChatGPT(ChatGPT):
+    """Hand-written ChatGPT stand-in: returns a canned reply, records prompts."""
+
+    def __init__(
+        self,
+        reply: str = "{}",
+        error: Optional[Exception] = None,
+    ) -> None:
+        self.prompts: list[str] = []
+        self.system_prompts: list[Optional[str]] = []
+        self._reply = reply
+        self._error = error
+
+    def generate(self, prompt: str, system_prompt: Optional[str] = None) -> str:
+        self.prompts.append(prompt)
+        self.system_prompts.append(system_prompt)
+        if self._error is not None:
+            raise self._error
+        return self._reply
+
+
+def _property_type_classifier(
+    chat_gpt: ChatGPT,
+) -> ChatGptColumnClassifier[PropertyType]:
+    return ChatGptColumnClassifier(chat_gpt, PropertyType, PropertyType.UNKNOWN)
+
+
+def test_classifies_description_into_its_category() -> None:
+    # Arrange
+    chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}')
+    classifier = _property_type_classifier(chat_gpt)
+
+    # Act
+    result = classifier.classify({"semi-detached"})
+
+    # Assert
+    assert result == {"semi-detached": PropertyType.HOUSE}
+
+
+def test_classifies_when_reply_is_wrapped_in_a_markdown_fence() -> None:
+    # Arrange: ChatGPT wraps the JSON in a ```json ... ``` code fence.
+    chat_gpt = _FakeChatGPT(reply='```json\n{"semi-detached": "House"}\n```')
+    classifier = _property_type_classifier(chat_gpt)
+
+    # Act
+    result = classifier.classify({"semi-detached"})
+
+    # Assert
+    assert result == {"semi-detached": PropertyType.HOUSE}
+
+
+def test_unrecognised_category_maps_to_unknown() -> None:
+    # Arrange
+    chat_gpt = _FakeChatGPT(reply='{"garden shed": "Shed"}')
+    classifier = _property_type_classifier(chat_gpt)
+
+    # Act
+    result = classifier.classify({"garden shed"})
+
+    # Assert
+    assert result == {"garden shed": PropertyType.UNKNOWN}
+
+
+def test_description_omitted_from_reply_maps_to_unknown() -> None:
+    # Arrange: the reply classifies one description but not the other.
+    chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}')
+    classifier = _property_type_classifier(chat_gpt)
+
+    # Act
+    result = classifier.classify({"semi-detached", "TBC"})
+
+    # Assert
+    assert result == {
+        "semi-detached": PropertyType.HOUSE,
+        "TBC": PropertyType.UNKNOWN,
+    }
+
+
+def test_chatgpt_failure_raises_classification_error() -> None:
+    # Arrange
+    chat_gpt = _FakeChatGPT(error=ChatGPTClientError("backend unreachable"))
+    classifier = _property_type_classifier(chat_gpt)
+
+    # Act / Assert
+    with pytest.raises(ClassificationError):
+        classifier.classify({"semi-detached"})
+
+
+def test_non_json_reply_raises_classification_error_with_the_raw_reply() -> None:
+    # Arrange
+    chat_gpt = _FakeChatGPT(reply="sorry, I can't do that")
+    classifier = _property_type_classifier(chat_gpt)
+
+    # Act / Assert: the error surfaces the offending reply for diagnosis.
+    with pytest.raises(ClassificationError, match="sorry, I can't do that"):
+        classifier.classify({"semi-detached"})
+
+
+def test_empty_description_set_returns_empty_without_calling_chatgpt() -> None:
+    # Arrange
+    chat_gpt = _FakeChatGPT(reply='{"unused": "House"}')
+    classifier = _property_type_classifier(chat_gpt)
+
+    # Act
+    result = classifier.classify(set())
+
+    # Assert
+    assert result == {}
+    assert chat_gpt.prompts == []
+
+
+def test_classifies_with_a_different_category_enum() -> None:
+    # Arrange: the same adapter classifies a WallType column.
+    chat_gpt = _FakeChatGPT(
+        reply='{"solid brick wall": "Solid brick, as built, no insulation (assumed)"}'
+    )
+    classifier = ChatGptColumnClassifier(chat_gpt, WallType, WallType.UNKNOWN)
+
+    # Act
+    result = classifier.classify({"solid brick wall"})
+
+    # Assert
+    assert result == {
+        "solid brick wall": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED
+    }
+
+
+def test_extra_instructions_are_appended_to_the_system_prompt() -> None:
+    # Arrange: column-specific guidance (e.g. wall-type build-era hints)
+    # should reach the model verbatim, in the system prompt ahead of the
+    # JSON-output instruction.
+    chat_gpt = _FakeChatGPT(reply='{"1970s semi": "House"}')
+    classifier = ChatGptColumnClassifier(
+        chat_gpt,
+        PropertyType,
+        PropertyType.UNKNOWN,
+        extra_instructions="If the description carries a build decade, prefer X.",
+    )
+
+    # Act
+    classifier.classify({"1970s semi"})
+
+    # Assert: the hint sits in the system prompt, before the JSON instruction.
+    system_prompt = chat_gpt.system_prompts[0]
+    assert system_prompt is not None
+    assert "If the description carries a build decade, prefer X." in system_prompt
+    hint_index = system_prompt.index("If the description carries a build decade")
+    json_index = system_prompt.index("Reply with only a JSON object")
+    assert hint_index < json_index
+
+
+def test_omitting_extra_instructions_leaves_the_system_prompt_unchanged() -> None:
+    # Arrange: a classifier without per-column guidance must still produce
+    # the original system prompt -- no trailing whitespace, no orphan hint.
+    chat_gpt = _FakeChatGPT(reply='{"semi-detached": "House"}')
+    classifier = ChatGptColumnClassifier(chat_gpt, PropertyType, PropertyType.UNKNOWN)
+
+    # Act
+    classifier.classify({"semi-detached"})
+
+    # Assert
+    system_prompt = chat_gpt.system_prompts[0]
+    assert system_prompt is not None
+    assert system_prompt == (
+        "Classify each free-text description into exactly one category. "
+        "Categories: House, Bungalow, Flat, Maisonette, Park home. "
+        "Reply with only a JSON object mapping each original description "
+        "to its category, and nothing else."
+    )
--- a/tests/infrastructure/test_csv_s3_client.py
+++ b/tests/infrastructure/test_csv_s3_client.py
@ -3,7 +3,7 @@ from collections.abc import Iterator
 import pytest
 from moto import mock_aws

-from infrastructure.csv_s3_client import CsvS3Client
+from infrastructure.s3.csv_s3_client import CsvS3Client
 from tests.infrastructure import make_boto_client

 BUCKET = "csv-bucket"
@ -49,3 +49,36 @@ def test_read_rows_rejects_wrong_bucket(csv_client: CsvS3Client) -> None:
    # act / assert
    with pytest.raises(ValueError, match="does not match client bucket"):
        csv_client.read_rows("s3://other-bucket/uploads/addresses.csv")
+
+
+def test_read_rows_indexes_duplicate_column_names(csv_client: CsvS3Client) -> None:
+    # arrange: the Hyde export has two columns both headed "Walls" — a
+    # description and a score. Without disambiguation csv.DictReader would
+    # collapse them onto one key and the description would be lost.
+    raw = "Address 1,Walls,Roofs,Walls\n1 High St,Cavity: Filled,Pitched 300mm,9.6\n"
+    uri = csv_client.put_object("uploads/dup.csv", raw.encode("utf-8"))
+
+    # act
+    rows = csv_client.read_rows(uri)
+
+    # assert: the first occurrence keeps its name, the second gets an index.
+    assert rows == [
+        {
+            "Address 1": "1 High St",
+            "Walls": "Cavity: Filled",
+            "Roofs": "Pitched 300mm",
+            "Walls_1": "9.6",
+        }
+    ]
+
+
+def test_read_rows_indexes_each_repeat_of_a_column(csv_client: CsvS3Client) -> None:
+    # arrange: three columns sharing one header.
+    raw = "Walls,Walls,Walls\nfirst,second,third\n"
+    uri = csv_client.put_object("uploads/triple.csv", raw.encode("utf-8"))
+
+    # act
+    rows = csv_client.read_rows(uri)
+
+    # assert
+    assert rows == [{"Walls": "first", "Walls_1": "second", "Walls_2": "third"}]
--- a/tests/infrastructure/test_s3_client.py
+++ b/tests/infrastructure/test_s3_client.py
@ -3,7 +3,7 @@ from collections.abc import Iterator
 import pytest
 from moto import mock_aws

-from infrastructure.s3_client import S3Client
+from infrastructure.s3.s3_client import S3Client
 from tests.infrastructure import make_boto_client

 BUCKET = "test-bucket"
--- a/tests/infrastructure/test_s3_uri.py
+++ b/tests/infrastructure/test_s3_uri.py
@ -1,6 +1,6 @@
 import pytest

-from infrastructure.s3_uri import parse_s3_uri
+from infrastructure.s3.s3_uri import parse_s3_uri


 def test_parses_simple_s3_uri() -> None:
--- a/tests/orchestration/test_landlord_description_overrides_orchestrator.py
+++ b/tests/orchestration/test_landlord_description_overrides_orchestrator.py
@ -0,0 +1,469 @@
+from __future__ import annotations
+
+from enum import Enum
+from typing import Any, Optional
+
+from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
+from domain.epc.built_form_type import BuiltFormType
+from domain.epc.property_type import PropertyType
+from domain.epc.wall_type import WallType
+from domain.postcode import Postcode
+from domain.data_transformation.column_classifier import ColumnClassifier
+from orchestration.classifiable_column import ClassifiableColumn
+from orchestration.landlord_description_overrides_orchestrator import (
+    LandlordDescriptionOverridesOrchestrator,
+)
+from repositories.landlord_overrides.landlord_override_repository import (
+    LandlordOverrideRepository,
+)
+from repositories.unstandardised_address.unstandardised_address_list_repository import (
+    UnstandardisedAddressListRepository,
+)
+
+
+class _StubUnstandardisedAddressRepository(UnstandardisedAddressListRepository):
+    """``get_col_to_description_mappings`` never touches the repo."""
+
+    def load_batch(self, s3_uri: str) -> AddressList:
+        raise NotImplementedError()
+
+    def save_batch(self, addresses: AddressList, path_prefix: str) -> str:
+        raise NotImplementedError()
+
+
+class _StubColumnClassifier(ColumnClassifier[Enum]):
+    """Records the descriptions it received; returns a canned mapping."""
+
+    def __init__(self, result: dict[str, Enum]) -> None:
+        self.received: Optional[set[str]] = None
+        self._result = result
+
+    def classify(self, descriptions: set[str]) -> dict[str, Enum]:
+        self.received = descriptions
+        return self._result
+
+
+class _StubLandlordOverrideRepository(LandlordOverrideRepository[Enum]):
+    """Records every ``upsert_all`` call so tests can assert routing."""
+
+    def __init__(self) -> None:
+        self.calls: list[tuple[int, dict[str, Enum]]] = []
+
+    def upsert_all(
+        self, portfolio_id: int, descriptions_to_values: dict[str, Enum]
+    ) -> None:
+        self.calls.append((portfolio_id, dict(descriptions_to_values)))
+
+
+def _make_unstandardised_address(
+    landlord_additional_info: dict[str, str],
+) -> UnstandardisedAddress:
+    return UnstandardisedAddress(
+        address="1 High St",
+        postcode=Postcode("AA1 1AA"),
+        additional_info=landlord_additional_info,
+    )
+
+
+def _orchestrator(
+    columns: Optional[list[ClassifiableColumn[Any]]] = None,
+) -> LandlordDescriptionOverridesOrchestrator:
+    return LandlordDescriptionOverridesOrchestrator(
+        unstandardised_address_repo=_StubUnstandardisedAddressRepository(),
+        columns=columns or [],
+    )
+
+
+def _column(
+    name: str,
+    source_column: str,
+    classifier: ColumnClassifier[Any],
+    repo: Optional[LandlordOverrideRepository[Any]] = None,
+) -> ClassifiableColumn[Any]:
+    return ClassifiableColumn(
+        name=name,
+        source_column=source_column,
+        classifier=classifier,
+        repo=repo or _StubLandlordOverrideRepository(),
+    )
+
+
+def test_collects_every_value_per_shared_key() -> None:
+    # arrange: every address carries the same keys, all values distinct.
+    addresses = AddressList(
+        [
+            _make_unstandardised_address({"description": "cosy", "condition": "new"}),
+            _make_unstandardised_address({"description": "spacious", "condition": "worn"}),
+            _make_unstandardised_address({"description": "bright", "condition": "fair"}),
+        ]
+    )
+
+    # act
+    mappings = _orchestrator().get_col_to_description_mappings(addresses)
+
+    # assert
+    assert mappings == {
+        "description": {"cosy", "spacious", "bright"},
+        "condition": {"new", "worn", "fair"},
+    }
+
+
+def test_repeated_values_collapse_to_one_variant() -> None:
+    # arrange: two addresses share the same wall description.
+    addresses = AddressList(
+        [
+            _make_unstandardised_address({"description": "cosy"}),
+            _make_unstandardised_address({"description": "cosy"}),
+            _make_unstandardised_address({"description": "bright"}),
+        ]
+    )
+
+    # act
+    mappings = _orchestrator().get_col_to_description_mappings(addresses)
+
+    # assert: a set keeps one entry per distinct variant.
+    assert mappings == {"description": {"cosy", "bright"}}
+
+
+def test_case_only_variants_collapse_to_one() -> None:
+    # arrange: the same description typed with inconsistent casing.
+    addresses = AddressList(
+        [
+            _make_unstandardised_address({"description": "Cosy"}),
+            _make_unstandardised_address({"description": "cosy"}),
+            _make_unstandardised_address({"description": "COSY"}),
+        ]
+    )
+
+    # act
+    mappings = _orchestrator().get_col_to_description_mappings(addresses)
+
+    # assert: lower-casing folds the casing typos into one variant.
+    assert mappings == {"description": {"cosy"}}
+
+
+def test_comma_separated_value_splits_into_individual_entries() -> None:
+    # arrange: a single cell packs several descriptions, comma-separated.
+    addresses = AddressList(
+        [_make_unstandardised_address({"description": "cosy, bright, COSY"})]
+    )
+
+    # act
+    mappings = _orchestrator().get_col_to_description_mappings(addresses)
+
+    # assert: each comma-separated part is its own trimmed, lower-cased entry.
+    assert mappings == {"description": {"cosy", "bright"}}
+
+
+def test_empty_address_list_yields_empty_mapping() -> None:
+    # arrange / act
+    mappings = _orchestrator().get_col_to_description_mappings(AddressList([]))
+
+    # assert
+    assert mappings == {}
+
+
+def test_single_address_yields_single_value_per_key() -> None:
+    # arrange
+    addresses = AddressList([_make_unstandardised_address({"description": "cosy"})])
+
+    # act
+    mappings = _orchestrator().get_col_to_description_mappings(addresses)
+
+    # assert
+    assert mappings == {"description": {"cosy"}}
+
+
+def test_classify_columns_classifies_each_registered_column() -> None:
+    # arrange: addresses carry two classifiable columns.
+    addresses = AddressList(
+        [
+            _make_unstandardised_address(
+                {"Property Type": "semi-detached", "Walls": "solid brick"}
+            ),
+        ]
+    )
+    property_types = _StubColumnClassifier(
+        result={"semi-detached": PropertyType.HOUSE}
+    )
+    wall_types = _StubColumnClassifier(result={"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED})
+
+    # act
+    result = _orchestrator(
+        [
+            _column("property_type", "Property Type", property_types),
+            _column("wall_type", "Walls", wall_types),
+        ]
+    ).classify_columns(addresses)
+
+    # assert: each registered column was classified independently, keyed by name.
+    assert result == {
+        "property_type": {"semi-detached": PropertyType.HOUSE},
+        "wall_type": {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED},
+    }
+
+
+def test_classify_columns_yields_empty_mapping_for_an_absent_column() -> None:
+    # arrange: a classifier is registered for a source column the addresses lack.
+    addresses = AddressList([_make_unstandardised_address({"Walls": "cavity"})])
+    property_types = _StubColumnClassifier(result={})
+
+    # act
+    result = _orchestrator(
+        [_column("property_type", "Property Type", property_types)]
+    ).classify_columns(addresses)
+
+    # assert: the absent column classified an empty description set.
+    assert result == {"property_type": {}}
+    assert property_types.received == set()
+
+
+def test_classify_columns_runs_two_classifiers_against_a_shared_source_column() -> None:
+    # arrange: the "Property Type" landlord column feeds two classifiers --
+    # PropertyType (what kind of dwelling) and BuiltFormType (how it joins
+    # to neighbours). Both must run against the same description set; each
+    # result is keyed by its column's ``name``.
+    addresses = AddressList(
+        [_make_unstandardised_address({"Property Type": "semi-detached house"})]
+    )
+    property_types = _StubColumnClassifier(
+        result={"semi-detached house": PropertyType.HOUSE}
+    )
+    built_form_types = _StubColumnClassifier(
+        result={"semi-detached house": BuiltFormType.SEMI_DETACHED}
+    )
+
+    # act
+    result = _orchestrator(
+        [
+            _column("property_type", "Property Type", property_types),
+            _column("built_form_type", "Property Type", built_form_types),
+        ]
+    ).classify_columns(addresses)
+
+    # assert: both classifiers saw the same description set, and the two
+    # results live under their own ``name`` keys without colliding.
+    assert property_types.received == {"semi-detached house"}
+    assert built_form_types.received == {"semi-detached house"}
+    assert result == {
+        "property_type": {"semi-detached house": PropertyType.HOUSE},
+        "built_form_type": {"semi-detached house": BuiltFormType.SEMI_DETACHED},
+    }
+
+
+def test_classify_and_persist_writes_each_columns_mapping_to_its_own_repo() -> None:
+    # arrange: two columns with distinct repos -- the orchestrator must
+    # route each column's classifications to its own repo, not mix them.
+    addresses = AddressList(
+        [
+            _make_unstandardised_address(
+                {"Property Type": "semi-detached", "Walls": "solid brick"}
+            ),
+        ]
+    )
+    property_type_repo = _StubLandlordOverrideRepository()
+    wall_type_repo = _StubLandlordOverrideRepository()
+    columns: list[ClassifiableColumn[Any]] = [
+        _column(
+            "property_type",
+            "Property Type",
+            _StubColumnClassifier({"semi-detached": PropertyType.HOUSE}),
+            property_type_repo,
+        ),
+        _column(
+            "wall_type",
+            "Walls",
+            _StubColumnClassifier({"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED}),
+            wall_type_repo,
+        ),
+    ]
+
+    # act
+    result = _orchestrator(columns).classify_and_persist(addresses, portfolio_id=42)
+
+    # assert: each repo received exactly its own column's mapping, under the
+    # given portfolio_id, and the return value mirrors classify_columns.
+    assert property_type_repo.calls == [(42, {"semi-detached": PropertyType.HOUSE})]
+    assert wall_type_repo.calls == [
+        (42, {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED})
+    ]
+    assert result == {
+        "property_type": {"semi-detached": PropertyType.HOUSE},
+        "wall_type": {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED},
+    }
+
+
+def test_classify_and_persist_skips_upsert_for_a_column_absent_from_the_batch() -> None:
+    # arrange: ``Walls`` is registered but the address has no ``Walls`` column.
+    # The orchestrator should still classify (yielding an empty mapping) but
+    # must NOT call ``upsert_all`` -- an empty bulk insert is a noisy no-op.
+    addresses = AddressList(
+        [_make_unstandardised_address({"Property Type": "semi-detached"})]
+    )
+    property_type_repo = _StubLandlordOverrideRepository()
+    wall_type_repo = _StubLandlordOverrideRepository()
+    columns: list[ClassifiableColumn[Any]] = [
+        _column(
+            "property_type",
+            "Property Type",
+            _StubColumnClassifier({"semi-detached": PropertyType.HOUSE}),
+            property_type_repo,
+        ),
+        _column(
+            "wall_type",
+            "Walls",
+            _StubColumnClassifier({}),
+            wall_type_repo,
+        ),
+    ]
+
+    # act
+    _orchestrator(columns).classify_and_persist(addresses, portfolio_id=7)
+
+    # assert: Property Type wrote; Walls did not.
+    assert property_type_repo.calls == [(7, {"semi-detached": PropertyType.HOUSE})]
+    assert wall_type_repo.calls == []
+
+
+def test_classify_from_rows_classifies_each_column_without_persisting() -> None:
+    # arrange: raw CSV rows (not an AddressList) carry two classifiable columns.
+    rows = [{"Property Type": "semi-detached", "Walls": "solid brick"}]
+    property_types = _StubColumnClassifier({"semi-detached": PropertyType.HOUSE})
+    wall_types = _StubColumnClassifier(
+        {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED}
+    )
+    property_type_repo = _StubLandlordOverrideRepository()
+    wall_type_repo = _StubLandlordOverrideRepository()
+
+    # act
+    result = _orchestrator(
+        [
+            _column("property_type", "Property Type", property_types, property_type_repo),
+            _column("wall_type", "Walls", wall_types, wall_type_repo),
+        ]
+    ).classify_from_rows(rows)
+
+    # assert: each classifier ran against its column's descriptions, keyed by
+    # name -- and NOT a single repo was touched (classification is DB-free, so
+    # the slow ChatGPT work can run before any transaction opens).
+    assert result == {
+        "property_type": {"semi-detached": PropertyType.HOUSE},
+        "wall_type": {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED},
+    }
+    assert property_type_repo.calls == []
+    assert wall_type_repo.calls == []
+
+
+def test_classify_from_rows_splits_and_normalises_descriptions() -> None:
+    # arrange: one cell packs several descriptions with inconsistent casing,
+    # spread across rows. The rows path must fold them exactly like the
+    # AddressList path: comma-split, trimmed, lower-cased, de-duped.
+    rows = [
+        {"Walls": "Solid Brick, cavity"},
+        {"Walls": "SOLID BRICK"},
+    ]
+    wall_types = _StubColumnClassifier({})
+
+    # act
+    _orchestrator(
+        [_column("wall_type", "Walls", wall_types)]
+    ).classify_from_rows(rows)
+
+    # assert: the classifier saw one normalised entry per distinct variant.
+    assert wall_types.received == {"solid brick", "cavity"}
+
+
+def test_classify_from_rows_yields_empty_mapping_for_an_absent_column() -> None:
+    # arrange: a column is registered for a header the rows lack.
+    rows = [{"Walls": "cavity"}]
+    property_types = _StubColumnClassifier({})
+
+    # act
+    result = _orchestrator(
+        [_column("property_type", "Property Type", property_types)]
+    ).classify_from_rows(rows)
+
+    # assert: the absent column classified an empty description set.
+    assert result == {"property_type": {}}
+    assert property_types.received == set()
+
+
+def test_persist_routes_each_columns_mapping_to_its_own_repo() -> None:
+    # arrange: a finished ``classified`` mapping (as classify_* would return)
+    # and two columns with distinct repos.
+    property_type_repo = _StubLandlordOverrideRepository()
+    wall_type_repo = _StubLandlordOverrideRepository()
+    columns: list[ClassifiableColumn[Any]] = [
+        _column("property_type", "Property Type", _StubColumnClassifier({}), property_type_repo),
+        _column("wall_type", "Walls", _StubColumnClassifier({}), wall_type_repo),
+    ]
+    classified: dict[str, dict[str, Enum]] = {
+        "property_type": {"semi-detached": PropertyType.HOUSE},
+        "wall_type": {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED},
+    }
+
+    # act
+    _orchestrator(columns).persist(classified, portfolio_id=42)
+
+    # assert: each repo received exactly its own column's mapping.
+    assert property_type_repo.calls == [(42, {"semi-detached": PropertyType.HOUSE})]
+    assert wall_type_repo.calls == [
+        (42, {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED})
+    ]
+
+
+def test_persist_skips_empty_and_missing_mappings() -> None:
+    # arrange: ``property_type`` has an empty mapping; ``wall_type`` is absent
+    # from ``classified`` entirely. Neither should hit the DB -- and the
+    # missing key must not raise (``persist`` reads with ``.get``).
+    property_type_repo = _StubLandlordOverrideRepository()
+    wall_type_repo = _StubLandlordOverrideRepository()
+    columns: list[ClassifiableColumn[Any]] = [
+        _column("property_type", "Property Type", _StubColumnClassifier({}), property_type_repo),
+        _column("wall_type", "Walls", _StubColumnClassifier({}), wall_type_repo),
+    ]
+    classified: dict[str, dict[str, Enum]] = {"property_type": {}}
+
+    # act
+    _orchestrator(columns).persist(classified, portfolio_id=7)
+
+    # assert: no upserts at all.
+    assert property_type_repo.calls == []
+    assert wall_type_repo.calls == []
+
+
+def test_classify_and_persist_from_rows_composes_classify_then_persist() -> None:
+    # arrange: the one-shot rows path must classify AND route to repos, so the
+    # convenience composition stays equivalent to calling the two in sequence.
+    rows = [{"Property Type": "semi-detached", "Walls": "solid brick"}]
+    property_type_repo = _StubLandlordOverrideRepository()
+    wall_type_repo = _StubLandlordOverrideRepository()
+    columns: list[ClassifiableColumn[Any]] = [
+        _column(
+            "property_type",
+            "Property Type",
+            _StubColumnClassifier({"semi-detached": PropertyType.HOUSE}),
+            property_type_repo,
+        ),
+        _column(
+            "wall_type",
+            "Walls",
+            _StubColumnClassifier(
+                {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED}
+            ),
+            wall_type_repo,
+        ),
+    ]
+
+    # act
+    result = _orchestrator(columns).classify_and_persist_from_rows(rows, portfolio_id=99)
+
+    # assert: same return shape as classify_from_rows, and each repo wrote once.
+    assert result == {
+        "property_type": {"semi-detached": PropertyType.HOUSE},
+        "wall_type": {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED},
+    }
+    assert property_type_repo.calls == [(99, {"semi-detached": PropertyType.HOUSE})]
+    assert wall_type_repo.calls == [
+        (99, {"solid brick": WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED})
+    ]
--- a/tests/orchestration/test_postcode_splitter_orchestrator.py
+++ b/tests/orchestration/test_postcode_splitter_orchestrator.py
@ -13,13 +13,13 @@ from sqlalchemy import Engine
 from sqlmodel import Session

 from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
-from infrastructure.csv_s3_client import CsvS3Client
+from infrastructure.s3.csv_s3_client import CsvS3Client
 from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
 from orchestration.task_orchestrator import TaskOrchestrator
 from repositories.tasks.subtask_postgres_repository import SubTaskPostgresRepository
 from repositories.tasks.task_postgres_repository import TaskPostgresRepository
-from repositories.user_address.user_address_csv_s3_repository import (
-    UserAddressCsvS3Repository,
+from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import (
+    UnstandardisedAddressListCsvS3Repository,
 )

 BUCKET = "splitter-bucket"
@ -27,7 +27,9 @@ REGION = "us-east-1"


 def _make_boto_client(service_name: str) -> Any:
-    factory: Any = boto3.client  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+    factory: Any = (
+        boto3.client
+    )  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
    return factory(service_name, region_name=REGION)


@ -62,7 +64,7 @@ class Harness:
    csv_client: CsvS3Client
    boto_sqs: Any
    queue_url: str
-    repo: UserAddressCsvS3Repository
+    repo: UnstandardisedAddressListCsvS3Repository


@pytest.fixture
@ -76,7 +78,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]:
        queue_url = cast(str, queue["QueueUrl"])

        csv_client = CsvS3Client(boto_s3, BUCKET)
-        repo = UserAddressCsvS3Repository(csv_client, BUCKET)
+        repo = UnstandardisedAddressListCsvS3Repository(csv_client, BUCKET)
        queue_client = Address2UprnQueueClient(boto_sqs, queue_url)

        # DB: ephemeral PostgreSQL TaskOrchestrator
@ -89,7 +91,7 @@ def harness(db_engine: Engine) -> Iterator[Harness]:

            splitter = PostcodeSplitterOrchestrator(
                task_orchestrator=task_orchestrator,
-                user_address_repo=repo,
+                unstandardised_address_repo=repo,
                queue_client=queue_client,
                max_batch_size=3,
            )
@ -169,10 +171,8 @@ def test_split_and_dispatch_creates_three_children_for_fixture(
    harness: Harness,
 ) -> None:
    # arrange
-    parent_task, parent_subtask = (
-        harness.task_orchestrator.create_task_with_subtask(
-            task_source="manual:postcode-splitter-int"
-        )
+    parent_task, parent_subtask = harness.task_orchestrator.create_task_with_subtask(
+        task_source="manual:postcode-splitter-int"
    )
    input_uri = _upload_fixture_csv(harness.csv_client)

@ -197,10 +197,8 @@ def test_split_and_dispatch_persists_child_inputs_with_task_id_and_s3_uri(
    harness: Harness,
 ) -> None:
    # arrange
-    parent_task, parent_subtask = (
-        harness.task_orchestrator.create_task_with_subtask(
-            task_source="manual:postcode-splitter-int"
-        )
+    parent_task, parent_subtask = harness.task_orchestrator.create_task_with_subtask(
+        task_source="manual:postcode-splitter-int"
    )
    input_uri = _upload_fixture_csv(harness.csv_client)

@ -230,10 +228,8 @@ def test_split_and_dispatch_publishes_one_message_per_child_with_matching_ids(
    harness: Harness,
 ) -> None:
    # arrange
-    parent_task, parent_subtask = (
-        harness.task_orchestrator.create_task_with_subtask(
-            task_source="manual:postcode-splitter-int"
-        )
+    parent_task, parent_subtask = harness.task_orchestrator.create_task_with_subtask(
+        task_source="manual:postcode-splitter-int"
    )
    input_uri = _upload_fixture_csv(harness.csv_client)

@ -267,10 +263,8 @@ def test_split_and_dispatch_returns_child_ids_in_dispatch_order(
    harness: Harness,
 ) -> None:
    # arrange
-    parent_task, parent_subtask = (
-        harness.task_orchestrator.create_task_with_subtask(
-            task_source="manual:postcode-splitter-int"
-        )
+    parent_task, parent_subtask = harness.task_orchestrator.create_task_with_subtask(
+        task_source="manual:postcode-splitter-int"
    )
    input_uri = _upload_fixture_csv(harness.csv_client)

--- a/tests/repositories/landlord_overrides/init.py
+++ b/tests/repositories/landlord_overrides/init.py
--- a/tests/repositories/landlord_overrides/postgres/init.py
+++ b/tests/repositories/landlord_overrides/postgres/init.py
--- a/tests/repositories/landlord_overrides/postgres/test_landlord_overrides_postgres_repository.py
+++ b/tests/repositories/landlord_overrides/postgres/test_landlord_overrides_postgres_repository.py
@ -0,0 +1,220 @@
+"""Integration tests for the consolidated landlord-overrides upsert adapter.
+
+``LandlordOverridesRepository`` serves every ``landlord_<category>_overrides``
+table; the table is selected by the ``…Row`` class passed at construction. The
+source-aware conflict policy lives entirely in SQL (``INSERT ... ON CONFLICT
+... DO UPDATE ... WHERE existing.source = 'classifier'``), so the only way to
+verify it correctly distinguishes ``EXCLUDED.source`` from the qualified
+``<table>.source`` is against a real Postgres -- the ``db_engine`` fixture in
+``tests/conftest.py`` spins one up per test.
+
+Each scenario is parametrised across two distinct categories (property type and
+wall type). Running the shared body against two different ``…Row`` classes
+proves the adapter writes to whichever table it was handed -- not just that one
+table happens to work.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+from dataclasses import dataclass
+from enum import Enum
+from typing import cast
+
+import pytest
+from sqlalchemy import Engine, Table
+from sqlmodel import Session, SQLModel, select
+
+from domain.epc.property_type import PropertyType
+from domain.epc.wall_type import WallType
+from infrastructure.landlord_overrides.landlord_overrides_postgres_repository import (
+    LandlordOverridesRepository,
+)
+from infrastructure.postgres.landlord_override_enums import OverrideSource
+from infrastructure.postgres.landlord_property_type_override_table import (
+    LandlordPropertyTypeOverrideRow,
+)
+from infrastructure.postgres.landlord_wall_type_override_table import (
+    LandlordWallTypeOverrideRow,
+)
+
+
+@dataclass(frozen=True)
+class Category:
+    """One ``landlord_<category>_overrides`` table plus a few of its enum values.
+
+    ``first``/``second`` are two distinct classifier values for re-upsert
+    tests; ``user_correction`` stands in for a value only a user would pick;
+    ``unknown`` is the category's ``UNKNOWN`` member.
+    """
+
+    id: str
+    row_type: type[SQLModel]
+    first: Enum
+    second: Enum
+    user_correction: Enum
+    unknown: Enum
+
+
+CATEGORIES = [
+    Category(
+        id="property_type",
+        row_type=LandlordPropertyTypeOverrideRow,
+        first=PropertyType.FLAT,
+        second=PropertyType.HOUSE,
+        user_correction=PropertyType.BUNGALOW,
+        unknown=PropertyType.UNKNOWN,
+    ),
+    Category(
+        id="wall_type",
+        row_type=LandlordWallTypeOverrideRow,
+        first=WallType.CAVITY_FILLED,
+        second=WallType.SOLID_BRICK_AS_BUILT_NO_INSULATION_ASSUMED,
+        user_correction=WallType.SANDSTONE_AS_BUILT_NO_INSULATION_ASSUMED,
+        unknown=WallType.UNKNOWN,
+    ),
+]
+
+
+@pytest.fixture
+def session(db_engine: Engine) -> Iterator[Session]:
+    with Session(db_engine) as s:
+        yield s
+
+
+@pytest.fixture(params=CATEGORIES, ids=lambda c: c.id)
+def category(request: pytest.FixtureRequest) -> Category:
+    return request.param
+
+
+def _select_row(
+    session: Session, row_type: type[SQLModel], portfolio_id: int, description: str
+) -> SQLModel:
+    # The row class is only known as ``type[SQLModel]`` here, so its column
+    # descriptors aren't typed; filter via the Core ``Table`` columns instead.
+    table = cast(Table, getattr(row_type, "__table__"))
+    rows = session.exec(
+        select(row_type).where(
+            table.c.portfolio_id == portfolio_id,
+            table.c.description == description,
+        )
+    ).all()
+    assert len(rows) == 1, f"expected exactly one row, got {len(rows)}"
+    return rows[0]
+
+
+def test_inserts_a_fresh_row_with_source_classifier(
+    session: Session, category: Category
+) -> None:
+    # arrange
+    repo = LandlordOverridesRepository[Enum](session, category.row_type)
+
+    # act
+    repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.first})
+    session.commit()
+
+    # assert
+    row = _select_row(session, category.row_type, portfolio_id=1, description="cosy")
+    assert getattr(row, "value") is category.first
+    assert getattr(row, "source") == OverrideSource.CLASSIFIER
+
+
+def test_reupsert_overwrites_a_classifier_row(
+    session: Session, category: Category
+) -> None:
+    # arrange: a stale classifier row exists.
+    repo = LandlordOverridesRepository[Enum](session, category.row_type)
+    repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.first})
+    session.commit()
+
+    # act: re-classify with a different value.
+    repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.second})
+    session.commit()
+
+    # assert: the new classification wins.
+    row = _select_row(session, category.row_type, portfolio_id=1, description="cosy")
+    assert getattr(row, "value") is category.second
+    assert getattr(row, "source") == OverrideSource.CLASSIFIER
+
+
+def test_reupsert_does_not_overwrite_a_user_row(
+    session: Session, category: Category
+) -> None:
+    # arrange: a user has corrected the row. The classifier path never produces
+    # ``source = 'user'``; we install the row directly to mimic the override
+    # frontend.
+    user_row = category.row_type(
+        portfolio_id=1,
+        description="cosy",
+        value=category.user_correction,
+        source=OverrideSource.USER,
+    )
+    session.add(user_row)
+    session.commit()
+
+    # act: the classifier re-runs and tries to reclassify the same description.
+    # Under the source-aware conflict policy, this must be silently skipped --
+    # user edits beat classifier reruns.
+    repo = LandlordOverridesRepository[Enum](session, category.row_type)
+    repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.second})
+    session.commit()
+
+    # assert: the user row is unchanged.
+    row = _select_row(session, category.row_type, portfolio_id=1, description="cosy")
+    assert getattr(row, "value") is category.user_correction
+    assert getattr(row, "source") == OverrideSource.USER
+
+
+def test_upsert_keeps_other_portfolios_descriptions_independent(
+    session: Session, category: Category
+) -> None:
+    # arrange / act: the unique key is ``(portfolio_id, description)``, so the
+    # same description for two different portfolios must coexist as two rows.
+    repo = LandlordOverridesRepository[Enum](session, category.row_type)
+    repo.upsert_all(portfolio_id=1, descriptions_to_values={"cosy": category.first})
+    repo.upsert_all(portfolio_id=2, descriptions_to_values={"cosy": category.second})
+    session.commit()
+
+    # assert: both rows survive with their own values.
+    assert (
+        getattr(_select_row(session, category.row_type, 1, "cosy"), "value")
+        is category.first
+    )
+    assert (
+        getattr(_select_row(session, category.row_type, 2, "cosy"), "value")
+        is category.second
+    )
+
+
+def test_upsert_persists_unknown_so_a_user_can_resolve_it_later(
+    session: Session, category: Category
+) -> None:
+    # arrange / act: a description the classifier couldn't resolve still lands
+    # -- per ADR-0002 §5 / ADR-0003 §Decision, so a future user override can
+    # upgrade it to a real value.
+    repo = LandlordOverridesRepository[Enum](session, category.row_type)
+    repo.upsert_all(
+        portfolio_id=1,
+        descriptions_to_values={"unparseable nonsense": category.unknown},
+    )
+    session.commit()
+
+    # assert: the row exists with value=UNKNOWN, source=classifier.
+    row = _select_row(
+        session, category.row_type, portfolio_id=1, description="unparseable nonsense"
+    )
+    assert getattr(row, "value") is category.unknown
+    assert getattr(row, "source") == OverrideSource.CLASSIFIER
+
+
+def test_upsert_all_with_empty_mapping_is_a_no_op(
+    session: Session, category: Category
+) -> None:
+    # arrange / act
+    repo = LandlordOverridesRepository[Enum](session, category.row_type)
+    repo.upsert_all(portfolio_id=1, descriptions_to_values={})
+    session.commit()
+
+    # assert: nothing was inserted.
+    rows = session.exec(select(category.row_type)).all()
+    assert rows == []
--- a/tests/repositories/unstandardised_address/init.py
+++ b/tests/repositories/unstandardised_address/init.py
--- a/tests/repositories/unstandardised_address/conftest.py
+++ b/tests/repositories/unstandardised_address/conftest.py
--- a/tests/repositories/unstandardised_address/test_unstandardised_address_list_csv_s3_repository.py
+++ b/tests/repositories/unstandardised_address/test_unstandardised_address_list_csv_s3_repository.py
@ -3,11 +3,11 @@ from collections.abc import Iterator
 import pytest
 from moto import mock_aws

-from domain.addresses.user_address import UserAddress
+from domain.addresses.unstandardised_address import AddressList, UnstandardisedAddress
 from domain.postcode import Postcode
-from infrastructure.csv_s3_client import CsvS3Client
-from repositories.user_address.user_address_csv_s3_repository import (
-    UserAddressCsvS3Repository,
+from infrastructure.s3.csv_s3_client import CsvS3Client
+from repositories.unstandardised_address.unstandardised_address_list_csv_s3_repository import (
+    UnstandardisedAddressListCsvS3Repository,
 )
 from tests.infrastructure import make_boto_client

@ -15,22 +15,22 @@ BUCKET = "user-address-bucket"


@pytest.fixture
-def repo() -> Iterator[UserAddressCsvS3Repository]:
+def repo() -> Iterator[UnstandardisedAddressListCsvS3Repository]:
    with mock_aws():
        boto_client = make_boto_client("s3")
        boto_client.create_bucket(Bucket=BUCKET)
        csv_client = CsvS3Client(boto_client, BUCKET)
-        yield UserAddressCsvS3Repository(csv_client, BUCKET)
+        yield UnstandardisedAddressListCsvS3Repository(csv_client, BUCKET)


 def _upload_csv(
-    repo: UserAddressCsvS3Repository, rows: list[dict[str, str]], key: str
+    repo: UnstandardisedAddressListCsvS3Repository, rows: list[dict[str, str]], key: str
 ) -> str:
    return repo._csv_client.save_rows(rows, key)  # pyright: ignore[reportPrivateUsage]


 def test_load_batch_parses_address_postcode_and_reference(
-    repo: UserAddressCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
    # arrange
    rows = [
@ -50,13 +50,13 @@ def test_load_batch_parses_address_postcode_and_reference(
    # assert
    assert len(addresses) == 1
    address = addresses[0]
-    assert address.user_address == "1 High Street, Flat 2, Townville"
+    assert address.address == "1 High Street, Flat 2, Townville"
    assert address.postcode == Postcode("SW1A1AA")
-    assert address.internal_reference == "REF-001"
+    assert address.org_reference == "REF-001"


 def test_load_batch_uses_only_address_1_when_others_missing(
-    repo: UserAddressCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
    # arrange
    rows = [
@ -75,13 +75,13 @@ def test_load_batch_uses_only_address_1_when_others_missing(

    # assert
    assert len(addresses) == 1
-    assert addresses[0].user_address == "10 Cardiff Road"
+    assert addresses[0].address == "10 Cardiff Road"
    assert addresses[0].postcode == Postcode("CF101AA")
-    assert addresses[0].internal_reference == "REF-002"
+    assert addresses[0].org_reference == "REF-002"


 def test_load_batch_handles_missing_internal_reference(
-    repo: UserAddressCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
    # arrange
    rows = [
@ -100,16 +100,16 @@ def test_load_batch_handles_missing_internal_reference(

    # assert
    assert len(addresses) == 1
-    assert addresses[0].user_address == "5 Park Lane"
+    assert addresses[0].address == "5 Park Lane"
    assert addresses[0].postcode == Postcode("M11AA")
-    assert addresses[0].internal_reference is None
+    assert addresses[0].org_reference is None


 def test_load_batch_captures_full_source_row(
-    repo: UserAddressCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
    # A raw EPC-export-shaped row: the splitter must preserve every column,
-    # not just the ones it parses into UserAddress fields.
+    # not just the ones it parses into UnstandardisedAddress fields.
    # arrange
    row = {
        "Asset Reference": "511",
@ -124,11 +124,11 @@ def test_load_batch_captures_full_source_row(
    addresses = repo.load_batch(uri)

    # assert
-    assert addresses[0].source_row == row
+    assert addresses[0].additional_info == row


 def test_load_batch_raises_when_postcode_column_absent(
-    repo: UserAddressCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
    # arrange
    rows = [{"Address 1": "1 High Street", "Property Type": "Flat"}]
@ -140,7 +140,7 @@ def test_load_batch_raises_when_postcode_column_absent(


 def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(
-    repo: UserAddressCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
    # arrange
    row = {
@ -154,7 +154,9 @@ def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(

    # act
    saved_uri = repo.save_batch(addresses, "tasks/passthrough")
-    saved_rows = repo._csv_client.read_rows(saved_uri)  # pyright: ignore[reportPrivateUsage]
+    saved_rows = repo._csv_client.read_rows(
+        saved_uri
+    )  # pyright: ignore[reportPrivateUsage]

    # assert
    assert len(saved_rows) == 1
@ -167,16 +169,21 @@ def test_save_batch_passes_through_all_columns_and_appends_postcode_clean(


 def test_save_batch_returns_uri_under_path_prefix(
-    repo: UserAddressCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
    # arrange
-    addresses = [
-        UserAddress(
-            user_address="1 High Street",
-            postcode=Postcode("SW1A 1AA"),
-            source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"},
-        ),
-    ]
+    addresses = AddressList(
+        [
+            UnstandardisedAddress(
+                address="1 High Street",
+                postcode=Postcode("SW1A 1AA"),
+                additional_info={
+                    "Address 1": "1 High Street",
+                    "postcode": "SW1A 1AA",
+                },
+            ),
+        ]
+    )

    # act
    uri = repo.save_batch(addresses, "tasks/abc/batches")
@ -187,7 +194,7 @@ def test_save_batch_returns_uri_under_path_prefix(


 def test_save_then_reload_round_trip_preserves_columns(
-    repo: UserAddressCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
    # arrange
    rows = [
@ -207,7 +214,9 @@ def test_save_then_reload_round_trip_preserves_columns(

    # act
    saved_uri = repo.save_batch(addresses, "tasks/round-trip")
-    saved_rows = repo._csv_client.read_rows(saved_uri)  # pyright: ignore[reportPrivateUsage]
+    saved_rows = repo._csv_client.read_rows(
+        saved_uri
+    )  # pyright: ignore[reportPrivateUsage]

    # assert
    # Original columns come back verbatim; postcode_clean is the only addition.
@ -218,16 +227,21 @@ def test_save_then_reload_round_trip_preserves_columns(


 def test_save_batch_uses_unique_filename_per_call(
-    repo: UserAddressCsvS3Repository,
+    repo: UnstandardisedAddressListCsvS3Repository,
 ) -> None:
    # arrange
-    addresses = [
-        UserAddress(
-            user_address="1 High Street",
-            postcode=Postcode("SW1A 1AA"),
-            source_row={"Address 1": "1 High Street", "postcode": "SW1A 1AA"},
-        ),
-    ]
+    addresses = AddressList(
+        [
+            UnstandardisedAddress(
+                address="1 High Street",
+                postcode=Postcode("SW1A 1AA"),
+                additional_info={
+                    "Address 1": "1 High Street",
+                    "postcode": "SW1A 1AA",
+                },
+            ),
+        ]
+    )

    # act
    uri_1 = repo.save_batch(addresses, "tasks/uniqueness")
--- a/tests/test_lambda_packaging.py
+++ b/tests/test_lambda_packaging.py
@ -0,0 +1,267 @@
+"""Static packaging linter for Lambda container images.
+
+Every Lambda here ships as a Docker image that copies a *subset* of the repo
+(``COPY utils/ utils/``, ``COPY backend/ backend/``, ...) and then runs a
+handler via ``CMD ["<module>.handler"]``. If the handler's import graph reaches
+a top-level package the Dockerfile forgot to ``COPY``, the function dies at cold
+start with ``Runtime.ImportModuleError: No module named '<pkg>'`` — but only
+*inside the image*. In the dev/test tree every package is present, so a plain
+``import`` test can't see the gap. This is exactly how ``No module named
+'domain'`` reached a deployed address2UPRN.
+
+The RIE smoke tests (.github/workflows/_smoke_test_lambda.yml) catch this too,
+but only by building the full image (minutes) and only for hand-listed services.
+This test catches the same class of bug in milliseconds, locally, for *every*
+handler Dockerfile — by statically computing each handler's import-time module
+graph and asserting every repo file it reaches is copied into the image.
+
+Scope: import-time (module-level) imports only — the ones that run at Lambda
+init, which is what ImportModuleError is about. Imports inside function bodies
+and under ``if TYPE_CHECKING:`` are deliberately ignored. Third-party / stdlib
+imports are out of scope (that's requirements.txt's job, covered by the RIE
+smoke test actually installing and importing).
+"""
+
+from __future__ import annotations
+
+import ast
+import json
+import re
+from pathlib import Path
+from typing import Optional
+
+import pytest
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+
+# Dockerfiles that are not Lambda handlers (test harness, dev containers).
+_SKIP_DOCKERFILES = {"Dockerfile.test", "Dockerfile.test.dockerignore"}
+_SKIP_PARTS = {".git", "node_modules", ".devcontainer"}
+
+
+def _toplevel_names() -> set[str]:
+    """Top-level repo packages/modules — the namespace handler imports resolve
+    against (imports are absolute: ``domain.x``, ``backend.y``)."""
+    names: set[str] = set()
+    for p in REPO_ROOT.iterdir():
+        if p.name.startswith(".") or p.name == "__pycache__":
+            continue
+        if p.is_dir():
+            names.add(p.name)
+        elif p.suffix == ".py":
+            names.add(p.stem)
+    return names
+
+
+_TOP = _toplevel_names()
+
+
+def _is_type_checking(test: ast.expr) -> bool:
+    if isinstance(test, ast.Name):
+        return test.id == "TYPE_CHECKING"
+    if isinstance(test, ast.Attribute):
+        return test.attr == "TYPE_CHECKING"
+    return False
+
+
+def _import_time_imports(path: Path) -> list[str]:
+    """Absolute module names imported when ``path`` is imported (i.e. at Lambda
+    init). Descends into module-level if/try/with and class bodies, but not into
+    function bodies (lazy) or ``if TYPE_CHECKING:`` blocks (never executed)."""
+    try:
+        tree = ast.parse(path.read_text(encoding="utf-8"), str(path))
+    except (SyntaxError, UnicodeDecodeError):
+        return []
+    out: list[str] = []
+
+    def visit(stmts: list[ast.stmt]) -> None:
+        for node in stmts:
+            if isinstance(node, ast.Import):
+                out.extend(alias.name for alias in node.names)
+            elif isinstance(node, ast.ImportFrom):
+                if not node.level and node.module:  # absolute imports only
+                    out.append(node.module)
+            elif isinstance(node, ast.If):
+                if _is_type_checking(node.test):
+                    continue
+                visit(node.body)
+                visit(node.orelse)
+            elif isinstance(node, ast.Try):
+                visit(node.body)
+                visit(node.orelse)
+                visit(node.finalbody)
+                for handler in node.handlers:
+                    visit(handler.body)
+            elif isinstance(node, ast.With):
+                visit(node.body)
+            elif isinstance(node, ast.ClassDef):
+                visit(node.body)
+            # FunctionDef / AsyncFunctionDef bodies are intentionally skipped.
+
+    visit(tree.body)
+    return out
+
+
+def _module_to_file(module: str) -> Optional[Path]:
+    """Resolve a dotted module to its repo source file (``foo.bar`` ->
+    ``foo/bar.py`` or ``foo/bar/__init__.py``)."""
+    base = REPO_ROOT.joinpath(*module.split("."))
+    py = base.with_suffix(".py")
+    if py.is_file():
+        return py
+    init = base / "__init__.py"
+    if init.is_file():
+        return init
+    return None
+
+
+def _import_closure(start: Path) -> dict[Path, Optional[Path]]:
+    """Repo files reachable from ``start`` via import-time imports, mapped to the
+    first file that imported each (for blame in failure messages)."""
+    reached: dict[Path, Optional[Path]] = {}
+    stack: list[tuple[Path, Optional[Path]]] = [(start, None)]
+    while stack:
+        path, importer = stack.pop()
+        if path in reached:
+            continue
+        reached[path] = importer
+        for module in _import_time_imports(path):
+            if module.split(".")[0] not in _TOP:
+                continue  # stdlib / third-party — not our concern here
+            target = _module_to_file(module)
+            if target is not None and target not in reached:
+                stack.append((target, path))
+    return reached
+
+
+def _norm(path_token: str) -> str:
+    return path_token.lstrip("./").rstrip("/")
+
+
+def _parse_handler_spec(dockerfile_text: str) -> Optional[str]:
+    """The ``<module>.handler`` string from the ``CMD`` line, or None if this
+    isn't a Lambda handler image."""
+    match = re.search(r"^CMD\s+(\[.*\]|.+)$", dockerfile_text, re.MULTILINE)
+    if not match:
+        return None
+    raw = match.group(1).strip()
+    try:
+        parsed = json.loads(raw)
+        spec = parsed[0] if isinstance(parsed, list) and parsed else raw
+    except json.JSONDecodeError:
+        spec = raw.strip('"')
+    return spec if isinstance(spec, str) and spec.endswith(".handler") else None
+
+
+def _parse_copies(dockerfile_text: str) -> list[tuple[list[str], str]]:
+    """``COPY`` instructions as (sources, dest), dropping ``--flag`` tokens."""
+    copies: list[tuple[list[str], str]] = []
+    for match in re.finditer(r"^COPY\s+(.+)$", dockerfile_text, re.MULTILINE):
+        tokens = [t for t in match.group(1).split() if not t.startswith("--")]
+        if len(tokens) < 2:
+            continue
+        *sources, dest = tokens
+        copies.append((sources, dest))
+    return copies
+
+
+def _resolve_handler_file(
+    spec: str, copies: list[tuple[list[str], str]]
+) -> Optional[Path]:
+    """Map a handler spec to its repo source file.
+
+    Handles both in-place layouts (``backend.foo.handler`` -> ``backend/foo/
+    handler.py``, present via ``COPY backend/ backend/``) and root-placed
+    handlers (``main.handler`` where a ``COPY <src> /var/task/main.py`` or
+    ``COPY <src>/main.py .`` puts the file at the image root)."""
+    module_path, _func = spec.rsplit(".", 1)
+
+    direct = REPO_ROOT / (module_path.replace(".", "/") + ".py")
+    if direct.is_file():
+        return direct
+
+    # Root-placed module: find the COPY whose destination basename matches.
+    wanted = module_path.split("/")[-1] + ".py"
+    for sources, dest in copies:
+        dest_norm = _norm(dest)
+        dest_is_named_file = Path(dest_norm).name == wanted
+        dest_is_dir = dest_norm in ("", module_path.split("/")[-1]) or dest.endswith("/")
+        for src in sources:
+            src_path = REPO_ROOT / _norm(src)
+            if not src_path.is_file():
+                continue
+            if dest_is_named_file or (dest_is_dir and src_path.name == wanted):
+                return src_path
+    return None
+
+
+def _is_copied(rel_path: str, copies: list[tuple[list[str], str]]) -> bool:
+    """Whether a repo-relative file path lands in the image via some COPY."""
+    rel_path = _norm(rel_path)
+    for sources, _dest in copies:
+        for src in sources:
+            src_norm = _norm(src)
+            if src_norm == "" or src_norm == rel_path or rel_path.startswith(src_norm + "/"):
+                return True
+    return False
+
+
+def _discover_handler_dockerfiles() -> list[Path]:
+    found: list[Path] = []
+    for path in REPO_ROOT.rglob("*Dockerfile*"):
+        if path.name in _SKIP_DOCKERFILES:
+            continue
+        if any(part in _SKIP_PARTS for part in path.relative_to(REPO_ROOT).parts):
+            continue
+        try:
+            text = path.read_text(encoding="utf-8")
+        except OSError:
+            continue
+        if _parse_handler_spec(text):
+            found.append(path)
+    return sorted(found)
+
+
+_HANDLER_DOCKERFILES = _discover_handler_dockerfiles()
+
+
+def test_handler_dockerfiles_discovered() -> None:
+    """Guard against the discovery silently finding nothing (e.g. a refactor
+    that renames Dockerfiles), which would make every check below vacuous."""
+    assert _HANDLER_DOCKERFILES, "no Lambda handler Dockerfiles found under repo root"
+
+
+@pytest.mark.parametrize(
+    "dockerfile",
+    _HANDLER_DOCKERFILES,
+    ids=[str(p.relative_to(REPO_ROOT)) for p in _HANDLER_DOCKERFILES],
+)
+def test_lambda_image_copies_full_import_closure(dockerfile: Path) -> None:
+    """Every repo file the handler imports at init must be COPYed into the image."""
+    text = dockerfile.read_text(encoding="utf-8")
+    spec = _parse_handler_spec(text)
+    assert spec is not None  # discovery guaranteed this
+    copies = _parse_copies(text)
+
+    handler_file = _resolve_handler_file(spec, copies)
+    assert handler_file is not None, (
+        f"{dockerfile.relative_to(REPO_ROOT)}: could not locate the source file "
+        f"for CMD handler {spec!r}. Update _resolve_handler_file if this is a new "
+        f"handler layout."
+    )
+
+    missing: list[str] = []
+    for reached, importer in _import_closure(handler_file).items():
+        rel = str(reached.relative_to(REPO_ROOT))
+        if not _is_copied(rel, copies):
+            blame = (
+                str(importer.relative_to(REPO_ROOT)) if importer else "(handler entrypoint)"
+            )
+            missing.append(f"  - {rel}\n      imported by {blame}")
+
+    assert not missing, (
+        f"{dockerfile.relative_to(REPO_ROOT)} runs `{spec}` but does not COPY "
+        f"{len(missing)} file(s) it imports at init. The Lambda will fail at cold "
+        f"start with Runtime.ImportModuleError. Add the missing top-level "
+        f"package(s) as `COPY <pkg>/ <pkg>/`:\n" + "\n".join(sorted(missing))
+    )