lanlord exetension application

2026-06-08 11:37:25 +00:00 · 2026-06-02 12:57:05 +00:00 · 2026-06-02 12:57:05 +00:00 · 38c82ebca3
commit 38c82ebca3
parent 90407799ac
8 changed files with 364 additions and 38 deletions
--- a/.claude/settings.json
+++ b/.claude/settings.json
@ -15,7 +15,33 @@
      "Bash(npx drizzle-kit *)",
      "Bash(echo \"frontend tsc exit: $?\")",
      "Bash(python3 -c ' *)",
-      "Bash(rm -f /workspaces/home/github/Model/backend/address2UPRN/local_handler/.env.local /workspaces/home/github/Model/backend/bulk_address2uprn_combiner/local_handler/.env.local && echo \"removed stub .env.local files\")"
+      "Bash(rm -f /workspaces/home/github/Model/backend/address2UPRN/local_handler/.env.local /workspaces/home/github/Model/backend/bulk_address2uprn_combiner/local_handler/.env.local && echo \"removed stub .env.local files\")",
+      "Bash(cat deployment/terraform/modules/s3_iam_policy/main.tf)",
+      "Bash(cat deployment/terraform/modules/s3_iam_policy/variables.tf)",
+      "Bash(terraform fmt *)",
+      "Bash(echo \"exit: $?\")",
+      "Bash(pip install *)",
+      "Bash(git -C /workspaces/assessment-model remote -v)",
+      "Bash(gh label *)",
+      "Bash(gh issue create --repo Hestia-Homes/assessment-model --label ready-for-agent --title 'Detect multi-entry rows and surface the largest-count sample on awaiting_review' --body ' *)",
+      "Bash(gh issue create --repo Hestia-Homes/assessment-model --label ready-for-agent --title 'Confirm building-part ordering and gate Finalise on it' --body ' *)",
+      "Bash(gh issue create --repo Hestia-Homes/assessment-model --label ready-for-agent --title 'Show our classification next to each multi-entry sample entry \\(read-only\\)' --body ' *)",
+      "Bash(gh issue create --repo Hestia-Homes/assessment-model --label ready-for-agent --title 'Editable classification verification writing source='\\\\''user'\\\\'', gating Finalise' --body ' *)",
+      "Bash(git check-ignore *)",
+      "Bash(git ls-tree *)",
+      "Bash(git worktree *)",
+      "Read(//workspaces/mig-wt/src/app/db/**)",
+      "Read(//workspaces/mig-wt/src/app/db/migrations/meta/**)",
+      "Bash(git branch *)",
+      "Bash(cp /workspaces/assessment-model/.env.local /tmp/mig-wt/.env.local; echo \"env copied\"; cat -n /tmp/mig-wt/src/app/db/schema/bulk_address_uploads.ts)",
+      "Bash(node /workspaces/assessment-model/node_modules/drizzle-kit/bin.cjs generate)",
+      "Bash(ln -s /workspaces/assessment-model/node_modules /tmp/mig-wt/node_modules)",
+      "Bash(node node_modules/drizzle-kit/bin.cjs generate)",
+      "Bash(git push *)",
+      "Bash(npm run *)",
+      "Bash(grep '\\\\.sql$')",
+      "Bash(git status *)",
+      "Bash(git checkout *)"
    ],
    "deny": [
      "Bash(npx drizzle-kit generate)",
@ -25,7 +51,9 @@
      "/workspaces/home/github/Model/backend/app/bulk_uploads",
      "/workspaces/home/github/Model/applications/landlord_description_overrides",
      "/workspaces/home/github/Model/orchestration",
-      "/workspaces/home/github/Model/backend/address2UPRN/local_handler"
+      "/workspaces/home/github/Model/backend/address2UPRN/local_handler",
+      "/workspaces/home/github/Model/deployment/terraform/shared",
+      "/tmp/mig-wt"
    ]
  }
 }
--- a/CONTEXT.md
+++ b/CONTEXT.md
@ -45,6 +45,27 @@ _Avoid_: customer data, manual override, landlord data
 The translation from a Landlord's free-text description in a BulkUpload column (e.g. `"cavity: filledcavity"`) to a canonical domain enum value (e.g. `WallType.CAVITY`). Produced by a `ColumnClassifier` (today an LLM, tomorrow possibly a lookup table or rules engine) in the Model service. Stored per-Portfolio, one row per `(category, description)`. A row carries provenance (`classifier` or `user`) so user overrides survive re-classification.
 _Avoid_: column mapping (that's a separate concept — see `ColumnMapping` above), classification, dictionary

+### Building parts
+
+**Building part**:
+One physically distinct part of a dwelling described by a single entry within a multi-valued cell. A dwelling is one **Main building** plus zero or more **Extensions**. Per-part descriptions appear as comma-separated entries in physical-element columns (e.g. `Walls`, `Roofs`); whole-dwelling columns (e.g. `Property Type`) carry a single entry and are **not** split per part.
+_Avoid_: annexe, unit, section, dwelling part
+
+**Main building**:
+The principal building part of a dwelling — exactly one per address. The others are **Extensions**.
+
+**Extension**:
+A building part that is not the Main building, numbered **Extension 1 … Extension N-1** for an N-entry address.
+_Avoid_: annexe, addition, outbuilding
+
+**Multi-entry**:
+The property of a BulkUpload row whose physical-element cells hold **more than one comma-separated entry**, one per **Building part**. Always intra-cell in our data — never multiple rows sharing one address/UPRN. Within a row, the multi-valued columns agree on entry-count, so **position `i` is the same Building part across every multi-valued column**.
+_Avoid_: multi-row, multi-record, duplicate address
+
+**Building-part ordering** (a.k.a. **ordering**):
+The user's declaration, captured once per file, of which list-position maps to which Building part — because the entry order is a consistent per-file mistake (`"A, B"` could be `[Main, Extension 1]` or `[Extension 1, Main]`). Stored per entry-count as a permutation. See [ADR-0004](./docs/adr/0004-multi-entry-building-part-ordering.md).
+_Avoid_: sort order, sequence, column mapping
+
 ## Lifecycle

 A **BulkUpload** moves through these statuses:
@ -65,6 +86,8 @@ Re-mapping (PATCHing `columnMapping`) is legal only in `ready_for_processing` an

 **Two writers**: Next.js owns transitions out of `mapping_complete`, into `processing`, and the terminal Finalise outcomes. FastAPI owns `combining` and `awaiting_review` — writing them direct to the DB during the combiner run. The BulkUpload aggregate observes both.

+At `awaiting_review`, **Finalise is gated** (not a new status — a precondition on the action): when classifier columns were mapped the user must acknowledge the classification-verification step, and when the file is **Multi-entry** they must confirm the **Building-part ordering**. See [ADR-0004](./docs/adr/0004-multi-entry-building-part-ordering.md).
+
 See [ADR-0001](./docs/adr/0001-bulk-upload-state-machine.md) for the deliberate "not yet" decisions baked into this lifecycle.

 ## Relationships
--- a/src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/start-address-matching/route.ts
+++ b/src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/start-address-matching/route.ts
@ -3,20 +3,24 @@ import { getServerSession } from "next-auth";
 import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
 import { createS3Client, createRetrofitDataS3Client, retrofitDataS3Bucket } from "@/app/utils/s3";
 import * as XLSX from "xlsx";
-import { loadForAddressMatching, triggerAddressMatching, triggerClassifier } from "@/lib/bulkUpload/server";
+import { loadForAddressMatching, saveMultiEntrySummary, triggerAddressMatching, triggerClassifier } from "@/lib/bulkUpload/server";
 import { readSessionToken } from "@/lib/session";
-import { ADDRESS_FIELDS } from "@/lib/bulkUpload/columnFields";
+import { ADDRESS_FIELDS, classifierMapping } from "@/lib/bulkUpload/columnFields";
+import { detectMultiEntry } from "@/lib/bulkUpload/multiEntry";

-function transformFile(
-  buffer: Buffer,
-  columnMapping: Record<string, string> // field → source header
-): { csv: string; error?: never } | { csv?: never; error: string } {
+type SheetRow = Record<string, unknown>;
+
+function readRows(buffer: Buffer): SheetRow[] {
  const wb = XLSX.read(buffer, { type: "buffer" });
  const sheet = wb.Sheets[wb.SheetNames[0]];
-  const rows = XLSX.utils.sheet_to_json<Record<string, unknown>>(sheet, { defval: "" });
-
-  if (rows.length === 0) return { error: "Empty file" };
+  return XLSX.utils.sheet_to_json<SheetRow>(sheet, { defval: "" });
+}

+// Address-matching CSV: address fields only, renamed to canonical headers.
+function buildAddressCsv(
+  rows: SheetRow[],
+  columnMapping: Record<string, string> // field → source header
+): { csv: string; error?: never } | { csv?: never; error: string } {
  const outputHeaders: string[] = [];
  const outputToSource: Record<string, string> = {};
  for (const field of ADDRESS_FIELDS) {
@ -32,7 +36,7 @@ function transformFile(
    return { error: 'Mapping must include "postcode"' };

  const outputRows = rows.map((row) => {
-    const out: Record<string, unknown> = {};
+    const out: SheetRow = {};
    for (const [outName, src] of Object.entries(outputToSource)) {
      out[outName] = row[src] ?? "";
    }
@ -43,6 +47,25 @@ function transformFile(
  return { csv: XLSX.utils.sheet_to_csv(outSheet) };
 }

+// Classifier CSV: the mapped classifier source columns only, original headers
+// preserved (the lambda resolves them via column_mapping). Converting here means
+// the classifier always reads a real CSV even when the upload was .xlsx/.xls —
+// see ADR-0003. One source header may feed several categories, so dedupe to
+// distinct headers.
+function buildClassifierCsv(
+  rows: SheetRow[],
+  classifierMap: Record<string, string> // category → source header
+): string {
+  const headers = [...new Set(Object.values(classifierMap))];
+  const outputRows = rows.map((row) => {
+    const out: SheetRow = {};
+    for (const h of headers) out[h] = row[h] ?? "";
+    return out;
+  });
+  const outSheet = XLSX.utils.json_to_sheet(outputRows, { header: headers });
+  return XLSX.utils.sheet_to_csv(outSheet);
+}
+
 export async function POST(
  request: NextRequest,
  { params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
@ -81,7 +104,15 @@ export async function POST(
    return NextResponse.json({ error: "Failed to read source file" }, { status: 500 });
  }

-  const transformed = transformFile(fileBuffer, upload.columnMapping!);
+  const rows = readRows(fileBuffer);
+  if (rows.length === 0)
+    return NextResponse.json({ error: "Empty file" }, { status: 422 });
+
+  // Detect multi-entry building parts now, while the whole file is parsed in
+  // memory, so the awaiting_review surface never re-reads it (ADR-0004).
+  await saveMultiEntrySummary(uploadId, detectMultiEntry(rows, upload.columnMapping!));
+
+  const transformed = buildAddressCsv(rows, upload.columnMapping!);
  if (transformed.error)
    return NextResponse.json({ error: transformed.error }, { status: 422 });

@ -102,13 +133,37 @@ export async function POST(

  const s3Uri = `s3://${outputBucket}/${transformedKey}`;

+  // Convert the mapped classifier columns to their own CSV so the classifier
+  // lambda always parses a real CSV, never the raw upload (which may be
+  // .xlsx/.xls). Only when the user mapped ≥1 classifier column. See ADR-0003.
+  const classifierMap = classifierMapping(upload.columnMapping!);
+  let classifierS3Uri: string | undefined;
+  if (Object.keys(classifierMap).length > 0) {
+    const classifierKey = `bulk_onboarding_inputs/${portfolioId}/${uploadId}-classifier.csv`;
+    try {
+      await outputS3
+        .putObject({
+          Bucket: outputBucket,
+          Key: classifierKey,
+          Body: buildClassifierCsv(rows, classifierMap),
+          ContentType: "text/csv",
+        })
+        .promise();
+      classifierS3Uri = `s3://${outputBucket}/${classifierKey}`;
+    } catch (err) {
+      // Non-blocking: classification is skipped, address matching proceeds.
+      console.error("Failed to upload classifier CSV:", err);
+    }
+  }
+
  const sessionToken = readSessionToken(request);
  const trigger = await triggerAddressMatching({ uploadId, s3Uri, sessionToken });
  if (trigger.kind === "trigger_failed")
    return NextResponse.json({ error: trigger.message }, { status: trigger.status });

  // Co-fire the landlord classifier (non-blocking) under the same task.
-  await triggerClassifier({ taskId: trigger.taskId, uploadId, sessionToken });
+  if (classifierS3Uri)
+    await triggerClassifier({ taskId: trigger.taskId, uploadId, s3Uri: classifierS3Uri, sessionToken });

  return NextResponse.json({ taskId: trigger.taskId }, { status: 200 });
 }
--- a/src/app/db/schema/bulk_address_uploads.ts
+++ b/src/app/db/schema/bulk_address_uploads.ts
@ -1,6 +1,30 @@
 import { pgTable, uuid, text, timestamp, jsonb } from "drizzle-orm/pg-core";
 import { sql } from "drizzle-orm";

+// Shape of the multi_entry_summary jsonb (ADR-0004). Co-located with the column
+// so the schema is self-contained; the detection logic in
+// src/lib/bulkUpload/multiEntry.ts imports these.
+export interface MultiEntryEntry {
+  raw: string;
+  description: string;
+}
+export interface MultiEntryColumn {
+  field: string;
+  header: string;
+  entries: MultiEntryEntry[];
+}
+export interface MultiEntrySample {
+  address: string;
+  count: number;
+  columns: MultiEntryColumn[];
+}
+export interface MultiEntrySummary {
+  multiValuedFields: string[];
+  countDistribution: Record<string, number>;
+  largestCount: number;
+  sample: MultiEntrySample | null;
+}
+
 export const bulkAddressUploads = pgTable("bulk_address_uploads", {
  id: uuid("id").defaultRandom().primaryKey(),
  portfolioId: text("portfolio_id").notNull(),
@ -11,6 +35,9 @@ export const bulkAddressUploads = pgTable("bulk_address_uploads", {
  status: text("status").notNull().default("ready_for_processing"),
  sourceHeaders: text("source_headers").array().notNull().default(sql`'{}'`),
  columnMapping: jsonb("column_mapping").$type<Record<string, string>>(),
+  // Multi-entry building-part detection, computed at start-address-matching
+  // and read by the awaiting_review review surface (ADR-0004).
+  multiEntrySummary: jsonb("multi_entry_summary").$type<MultiEntrySummary>(),
  taskId: uuid("task_id"),
  combinedOutputS3Uri: text("combined_output_s3_uri"),
  createdAt: timestamp("created_at", { withTimezone: true }).notNull().defaultNow(),
--- a/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx
+++ b/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx
@ -8,6 +8,7 @@ import {
  useFinalize,
  useRequestCombine,
 } from "@/lib/bulkUpload/client";
+import type { MultiEntrySample } from "@/lib/bulkUpload/multiEntry";

 interface Props {
  portfolioSlug: string;
@ -60,6 +61,13 @@ export default function OnboardingProgress({
  const canRunCombiner = taskDone && !taskFailed && upload.status === "processing";
  const canFinalize = upload.status === "awaiting_review";

+  // Multi-entry building-part sample, shown read-only on the review surface
+  // (ADR-0004). Ordering confirmation arrives in a later slice.
+  const multiEntrySample =
+    upload.status === "awaiting_review"
+      ? (upload.multiEntrySummary?.sample ?? null)
+      : null;
+
  return (
    <div className="mt-6 space-y-3">
      <div className="w-full bg-gray-100 rounded-full h-2 overflow-hidden">
@ -70,17 +78,24 @@ export default function OnboardingProgress({
      </div>

      <div className="flex items-center gap-4 text-xs text-gray-500">
-        {total > 0 && (
-          <span>
-            <span className="font-semibold text-gray-700">{completedSubtasks}</span> / {total} batches complete
-          </span>
-        )}
-        {failedSubtasks > 0 && (
-          <span className="flex items-center gap-1 text-red-500 font-semibold">
-            <span className="w-1.5 h-1.5 rounded-full bg-red-400" />
-            {failedSubtasks} failed
-          </span>
-        )}
+        {/* Address matching: standardises addresses against the OS lookup, in batches. */}
+        <span className="flex items-center gap-1">
+          <span className="text-gray-400">Address matching:</span>
+          {failedSubtasks > 0 ? (
+            <span className="flex items-center gap-1 text-red-500 font-semibold">
+              <span className="w-1.5 h-1.5 rounded-full bg-red-400" />
+              {failedSubtasks} of {total} batches failed
+            </span>
+          ) : total > 0 && completedSubtasks >= total ? (
+            <span className="font-semibold text-green-600">complete</span>
+          ) : (
+            <span className="flex items-center gap-1 text-blue-500">
+              <span className="w-1.5 h-1.5 rounded-full bg-blue-400 animate-pulse" />
+              running{total > 0 ? ` · ${completedSubtasks} / ${total} batches` : ""}
+            </span>
+          )}
+        </span>
+        {/* Classification: turns the landlord's free-text descriptions into EPC categories. */}
        {classifierTotal > 0 && (
          <span className="flex items-center gap-1">
            <span className="text-gray-400">Classification:</span>
@ -99,12 +114,6 @@ export default function OnboardingProgress({
            )}
          </span>
        )}
-        {!taskDone && (
-          <span className="flex items-center gap-1 text-blue-500">
-            <span className="w-1.5 h-1.5 rounded-full bg-blue-400 animate-pulse" />
-            Running
-          </span>
-        )}
        {isCombining && (
          <span className="flex items-center gap-1 text-blue-500">
            <span className="w-1.5 h-1.5 rounded-full bg-blue-400 animate-pulse" />
@ -119,6 +128,8 @@ export default function OnboardingProgress({
        )}
      </div>

+      {multiEntrySample && <MultiEntrySamplePanel sample={multiEntrySample} />}
+
      {(canRunCombiner || canFinalize) && (
        <div className="flex flex-col gap-2 pt-2">
          {canRunCombiner && (
@ -164,6 +175,51 @@ export default function OnboardingProgress({
  );
 }

+// Read-only preview of the largest-count multi-entry row (ADR-0004). Each
+// comma-separated entry is a building part; the user will confirm their order
+// in a later slice. Positions are shown 1-based, unlabelled for now.
+function MultiEntrySamplePanel({ sample }: { sample: MultiEntrySample }) {
+  return (
+    <div className="rounded-lg border border-amber-200 bg-amber-50 px-4 py-3">
+      <p className="text-sm font-semibold text-amber-900">
+        Multiple building parts detected
+      </p>
+      <p className="mt-0.5 text-xs text-amber-800">
+        {sample.address ? <span className="font-medium">{sample.address}</span> : "An address"}{" "}
+        has {sample.count} building parts (e.g. a main building and extensions).
+        You&apos;ll be asked to confirm their order before finalising.
+      </p>
+
+      <div className="mt-3 overflow-x-auto">
+        <table className="w-full border-collapse text-xs">
+          <thead>
+            <tr className="text-left text-amber-700">
+              <th className="py-1 pr-3 font-medium">Position</th>
+              {sample.columns.map((column) => (
+                <th key={column.field} className="py-1 pr-3 font-medium">
+                  {column.header}
+                </th>
+              ))}
+            </tr>
+          </thead>
+          <tbody>
+            {Array.from({ length: sample.count }).map((_, position) => (
+              <tr key={position} className="border-t border-amber-100 text-amber-900">
+                <td className="py-1 pr-3 text-amber-600">{position + 1}</td>
+                {sample.columns.map((column) => (
+                  <td key={column.field} className="py-1 pr-3">
+                    {column.entries[position]?.raw ?? "—"}
+                  </td>
+                ))}
+              </tr>
+            ))}
+          </tbody>
+        </table>
+      </div>
+    </div>
+  );
+}
+
 function StageButton({
  label,
  activeLabel,
--- a/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/map-columns/MapColumnsClient.tsx
+++ b/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/map-columns/MapColumnsClient.tsx
@ -104,8 +104,8 @@ export default function MapColumnsClient({
            className="w-full text-sm border border-gray-200 rounded-lg px-3 py-2 bg-white text-gray-800 focus:outline-none focus:ring-2 focus:ring-[#14163d]/20 focus:border-[#14163d]"
          >
            <option value={NOT_PROVIDED}>Not provided</option>
-            {sourceHeaders.map((header) => (
-              <option key={header} value={header}>
+            {sourceHeaders.map((header, index) => (
+              <option key={`${header}-${index}`} value={header}>
                {header}
              </option>
            ))}
--- a/src/lib/bulkUpload/multiEntry.ts
+++ b/src/lib/bulkUpload/multiEntry.ts
@ -0,0 +1,119 @@
+// Multi-entry building-part detection (ADR-0004).
+//
+// A BulkUpload row can carry several comma-separated entries in a physical-
+// element column (e.g. Walls = "Cavity: AsBuilt (1976-1982), Cavity:
+// FilledCavity"). Each entry is a Building part (Main building + Extensions).
+// This module finds that pattern and captures one sample — the row with the
+// MOST building parts — so the user can confirm the ordering downstream.
+//
+// Pure + I/O-free so it's unit-testable; the start-address-matching route runs
+// it over the already-parsed upload rows and persists the result on the upload.
+
+import { ADDRESS_FIELDS, classifierMapping } from "./columnFields";
+import type {
+  MultiEntryEntry,
+  MultiEntryColumn,
+  MultiEntrySummary,
+} from "@/app/db/schema/bulk_address_uploads";
+
+// The jsonb shape lives with the column (schema/bulk_address_uploads.ts) so the
+// migration is self-contained; re-export here for callers of this module.
+export type {
+  MultiEntryEntry,
+  MultiEntryColumn,
+  MultiEntrySample,
+  MultiEntrySummary,
+} from "@/app/db/schema/bulk_address_uploads";
+
+export const EMPTY_MULTI_ENTRY_SUMMARY: MultiEntrySummary = {
+  multiValuedFields: [],
+  countDistribution: {},
+  largestCount: 0,
+  sample: null,
+};
+
+// Split a cell into building-part entries. Mirrors the classifier's
+// split(",") → trim → lower, dropping empty fragments so positions align
+// across raw and normalized forms.
+export function splitEntries(value: unknown): MultiEntryEntry[] {
+  return String(value ?? "")
+    .split(",")
+    .map((s) => s.trim())
+    .filter((s) => s.length > 0)
+    .map((raw) => ({ raw, description: raw.toLowerCase() }));
+}
+
+// Compose a display address from the mapped address fields (reference excluded).
+function buildAddress(
+  row: Record<string, unknown>,
+  columnMapping: Record<string, string>,
+): string {
+  const parts: string[] = [];
+  for (const field of ADDRESS_FIELDS) {
+    if (field.value === "internal_reference") continue;
+    const header = columnMapping[field.value];
+    if (!header) continue;
+    const value = String(row[header] ?? "").trim();
+    if (value) parts.push(value);
+  }
+  return parts.join(", ");
+}
+
+// Scan the mapped classifier columns for multi-entry rows and capture the
+// largest-count sample. Only classifier columns are considered — they're the
+// physical-element descriptions we slice into building parts; address columns
+// are single-valued by nature.
+export function detectMultiEntry(
+  rows: Array<Record<string, unknown>>,
+  columnMapping: Record<string, string>,
+): MultiEntrySummary {
+  const classifierCols = Object.entries(classifierMapping(columnMapping));
+  if (classifierCols.length === 0) return EMPTY_MULTI_ENTRY_SUMMARY;
+
+  const multiValued = new Set<string>();
+  const countDistribution: Record<string, number> = {};
+  let largestCount = 0;
+  let sampleRowIndex = -1;
+
+  rows.forEach((row, index) => {
+    let rowMax = 0;
+    for (const [field, header] of classifierCols) {
+      const n = splitEntries(row[header]).length;
+      if (n > 1) multiValued.add(field);
+      if (n > rowMax) rowMax = n;
+    }
+    if (rowMax >= 2) {
+      const key = String(rowMax);
+      countDistribution[key] = (countDistribution[key] ?? 0) + 1;
+      // First row at a new maximum becomes the sample.
+      if (rowMax > largestCount) {
+        largestCount = rowMax;
+        sampleRowIndex = index;
+      }
+    }
+  });
+
+  if (sampleRowIndex === -1) return EMPTY_MULTI_ENTRY_SUMMARY;
+
+  const sampleRow = rows[sampleRowIndex];
+  // Show only the columns that are actually split in the sample row;
+  // single-value columns are whole-dwelling facts, not building parts.
+  const columns: MultiEntryColumn[] = classifierCols
+    .map(([field, header]) => ({
+      field,
+      header,
+      entries: splitEntries(sampleRow[header]),
+    }))
+    .filter((column) => column.entries.length > 1);
+
+  return {
+    multiValuedFields: [...multiValued],
+    countDistribution,
+    largestCount,
+    sample: {
+      address: buildAddress(sampleRow, columnMapping),
+      count: largestCount,
+      columns,
+    },
+  };
+}
--- a/src/lib/bulkUpload/server.ts
+++ b/src/lib/bulkUpload/server.ts
@ -6,6 +6,7 @@ import { count, desc, eq, sql } from "drizzle-orm";
 import type { BulkUpload, BulkUploadStatus, ProgressView, TaskSummary } from "./types";
 import { validateColumnMapping, classifierMapping } from "./columnFields";
 import { SUBTASK_SERVICE } from "./types";
+import type { MultiEntrySummary } from "./multiEntry";

 const REMAP_ALLOWED: ReadonlySet<BulkUploadStatus> = new Set([
  "ready_for_processing",
@ -102,6 +103,20 @@ export async function getProgressView(uploadId: string): Promise<ProgressView |
  return { upload, task };
 }

+// Persist the multi-entry building-part detection (ADR-0004). Computed once at
+// start-address-matching from the already-parsed rows; read back on the
+// awaiting_review surface. Only this column is touched, so the later
+// status/taskId update leaves it intact.
+export async function saveMultiEntrySummary(
+  uploadId: string,
+  summary: MultiEntrySummary,
+): Promise<void> {
+  await db
+    .update(bulkAddressUploads)
+    .set({ multiEntrySummary: summary })
+    .where(eq(bulkAddressUploads.id, uploadId));
+}
+
 export type SetMappingOutcome =
  | { kind: "ok"; upload: BulkUpload }
  | { kind: "not_found" }
@ -211,13 +226,16 @@ export async function triggerAddressMatching(args: {
  return { kind: "ok", taskId: task.id };
 }

-// Co-fires the landlord classifier as a subtask under the address task. Reads
-// the ORIGINAL upload (the address-matching CSV strips the description columns)
-// and is non-blocking: a trigger failure marks only the classifier subtask, so
-// address matching is unaffected. See ADR-0003.
+// Co-fires the landlord classifier as a subtask under the address task. Reads a
+// dedicated classifier CSV (the classifier columns converted from the upload by
+// the start-address-matching route — the address-matching CSV strips the
+// description columns), so the lambda always parses a real CSV even for
+// .xlsx/.xls uploads. Non-blocking: a trigger failure marks only the classifier
+// subtask, so address matching is unaffected. See ADR-0003.
 export async function triggerClassifier(args: {
  taskId: string;
  uploadId: string;
+  s3Uri: string;
  sessionToken: string | undefined;
 }): Promise<void> {
  const upload = await loadById(args.uploadId);
@ -239,7 +257,7 @@ export async function triggerClassifier(args: {
  const payload = {
    task_id: args.taskId,
    sub_task_id: subTask.id,
-    s3_uri: `s3://${upload.s3Bucket}/${upload.s3Key}`,
+    s3_uri: args.s3Uri,
    portfolio_id: Number(upload.portfolioId),
    column_mapping: columnMapping,
  };