Add Step 1 "Verify classification" to bulk-upload review (ADR-0004)

For uploads with classifier columns, surface the classifier's
description→enum assumptions on the awaiting_review screen so the user can
correct any (written source='user') and acknowledge before Finalise.
Previously only the multi-entry order step existed, so non-multi-entry
uploads got no classification review at all and the assumptions were
applied silently.

- detectMultiEntry: capture a sample whenever classifier columns are
  mapped (largest-count row if multi-entry, else first classified row);
  the sample now carries all classifier columns. "sample != null" means
  "there is something to verify"; largestCount >= 2 stays the multi-entry
  signal.
- setVerifyAck + verify-classification PATCH route + useConfirmVerification.
- VerifyClassificationPanel (Step 1); MultiEntryOrderingPanel slimmed to
  order-only with read-only classification annotation; canFinalize gated
  on both steps where each applies.
- Unit tests for detectMultiEntry + ordering helpers.

The verify_ack column + 0219 migration landed separately via #303.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Jun-te Kim 2026-06-02 18:18:39 +00:00
parent 7f4a0e32d7
commit 88a0ce04d9
6 changed files with 369 additions and 58 deletions

View file

@ -0,0 +1,35 @@
import { setVerifyAck } from "@/lib/bulkUpload/server";
import { NextRequest, NextResponse } from "next/server";
import { getServerSession } from "next-auth";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
// Records the "Verify classification" acknowledgement (ADR-0004 Step 1). No
// body — the per-row corrections go through the classifications PATCH; this just
// marks that the user has checked the sample and unlocks Finalise.
export async function PATCH(
request: NextRequest,
{ params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
) {
const session = await getServerSession(AuthOptions);
if (!session) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
const { uploadId } = await params;
try {
const result = await setVerifyAck(uploadId);
switch (result.kind) {
case "ok":
return NextResponse.json(result.upload, { status: 200 });
case "not_found":
return NextResponse.json({ error: "Not found" }, { status: 404 });
case "wrong_state":
return NextResponse.json(
{ error: `Cannot verify classification in state '${result.current}'` },
{ status: 409 }
);
}
} catch (error) {
console.error("Failed to record classification verification:", error);
return NextResponse.json({ error: "Internal server error" }, { status: 500 });
}
}

View file

@ -7,6 +7,7 @@ import { ArrowRightIcon, CheckCircleIcon } from "@heroicons/react/24/outline";
import {
useBulkUploadProgress,
useConfirmMultiEntryOrdering,
useConfirmVerification,
useEditClassification,
useFinalize,
useRequestCombine,
@ -94,13 +95,21 @@ export default function OnboardingProgress({
const canRunCombiner = taskDone && !taskFailed && upload.status === "processing";
const isAwaitingReview = upload.status === "awaiting_review";
// Multi-entry building-part sample on the review surface (ADR-0004). When the
// upload is multi-entry, Finalise is gated on the user confirming the order.
const multiEntrySample = isAwaitingReview
? (upload.multiEntrySummary?.sample ?? null)
: null;
// Two-step review on the awaiting_review surface (ADR-0004). The sample exists
// whenever classifier columns were mapped; multi-entry is largestCount >= 2.
// Step 1 (verify) applies whenever there's a sample; Step 2 (order) only when
// multi-entry. Each gates Finalise where it applies.
const sample = isAwaitingReview ? (upload.multiEntrySummary?.sample ?? null) : null;
const isMultiEntry = (upload.multiEntrySummary?.largestCount ?? 0) >= 2;
const verifyAck = upload.verifyAck ?? false;
const orderingConfirmed = upload.multiEntryOrdering?.confirmed ?? false;
const canFinalize = isAwaitingReview && (!multiEntrySample || orderingConfirmed);
const needsVerify = !!sample;
const needsOrdering = !!sample && isMultiEntry;
const showStepNumbers = needsVerify && needsOrdering;
const canFinalize =
isAwaitingReview &&
(!needsVerify || verifyAck) &&
(!needsOrdering || orderingConfirmed);
return (
<div className="mt-6 space-y-3">
@ -162,11 +171,23 @@ export default function OnboardingProgress({
)}
</div>
{multiEntrySample && (
{needsVerify && sample && (
<VerifyClassificationPanel
sample={sample}
classifications={classifications.data ?? {}}
verified={verifyAck}
stepLabel={showStepNumbers ? "Step 1" : undefined}
portfolioId={portfolioId}
uploadId={uploadId}
/>
)}
{needsOrdering && sample && (
<MultiEntryOrderingPanel
sample={multiEntrySample}
sample={sample}
ordering={upload.multiEntryOrdering ?? null}
classifications={classifications.data ?? {}}
stepLabel={showStepNumbers ? "Step 2" : undefined}
portfolioId={portfolioId}
uploadId={uploadId}
/>
@ -188,7 +209,11 @@ export default function OnboardingProgress({
activeLabel="Finalising…"
isPending={finalize.isPending}
disabled={!canFinalize}
disabledReason="Confirm the building-part order first"
disabledReason={
needsVerify && !verifyAck
? "Verify the classification first"
: "Confirm the building-part order first"
}
onClick={() =>
finalize.mutate(undefined, { onSuccess: () => router.refresh() })
}
@ -219,6 +244,129 @@ export default function OnboardingProgress({
);
}
// Step 1 — Verify classification (ADR-0004). Lists how we read the sample
// address's descriptions (per mapped classifier column) and lets the user
// correct any, written back as source='user'. Acknowledging unlocks Finalise.
// Shown whenever classifier columns were mapped, multi-entry or not.
function VerifyClassificationPanel({
sample,
classifications,
verified,
stepLabel,
portfolioId,
uploadId,
}: {
sample: MultiEntrySample;
classifications: SampleClassifications;
verified: boolean;
stepLabel?: string;
portfolioId: string;
uploadId: string;
}) {
const editClassification = useEditClassification(portfolioId, uploadId);
const confirm = useConfirmVerification(portfolioId, uploadId);
return (
<div className="rounded-lg border border-amber-200 bg-amber-50 px-4 py-3">
<p className="text-sm font-semibold text-amber-900">
{stepLabel ? `${stepLabel}` : ""}Verify classification
</p>
<p className="mt-0.5 text-xs text-amber-800">
Based on your column mapping, here&apos;s how we read{" "}
{sample.address ? <span className="font-medium">{sample.address}</span> : "one sample address"}.
Correct anything that&apos;s wrong, then confirm.
</p>
<div className="mt-3 space-y-3">
{sample.columns.map((column) => {
// One editable row per distinct description, in file order.
const seen = new Set<string>();
const entries = column.entries.filter((entry) => {
if (seen.has(entry.description)) return false;
seen.add(entry.description);
return true;
});
const options = CATEGORY_VALUES[column.field] ?? [];
return (
<div key={column.field}>
<p className="text-[11px] font-semibold uppercase tracking-wide text-amber-700">
{column.header}
</p>
<div className="mt-1 space-y-1">
{entries.map((entry) => {
const classified = classifications[column.field]?.[entry.description] ?? "";
return (
<div
key={entry.description}
className="flex items-center gap-2 text-xs text-amber-900"
>
<span className="min-w-0 flex-1 truncate" title={entry.raw}>
{entry.raw}
</span>
<span className="text-amber-400"></span>
<select
value={classified}
onChange={(e) =>
editClassification.mutate({
field: column.field,
description: entry.description,
value: e.target.value,
})
}
disabled={editClassification.isPending}
className="max-w-[14rem] rounded border border-amber-300 bg-white px-1.5 py-0.5 text-[11px] text-amber-900"
>
<option value="" disabled>
{classified ? classified : "not classified"}
</option>
{options.map((opt) => (
<option key={opt} value={opt}>
{opt}
</option>
))}
</select>
</div>
);
})}
</div>
</div>
);
})}
</div>
<p className="mt-2 text-[11px] text-amber-700">
Correcting a classification updates that description for{" "}
<span className="font-medium">every</span> row across the portfolio, not
just this address.
</p>
{editClassification.error && (
<p className="mt-1 text-xs text-red-600">{editClassification.error.message}</p>
)}
<div className="mt-3 flex items-center gap-3">
<button
onClick={() => confirm.mutate()}
disabled={confirm.isPending}
className={`inline-flex items-center gap-1.5 rounded-lg px-4 py-1.5 text-xs font-bold text-white transition-opacity ${
confirm.isPending ? "bg-amber-300 cursor-not-allowed" : "bg-amber-600 hover:opacity-90"
}`}
>
{confirm.isPending ? "Saving…" : verified ? "Re-confirm" : "These look right"}
</button>
{verified && !confirm.isPending && (
<span className="inline-flex items-center gap-1 text-xs font-semibold text-green-600">
<CheckCircleIcon className="h-4 w-4" />
Classification verified
</span>
)}
</div>
{confirm.error && (
<p className="mt-2 text-xs text-red-600">{confirm.error.message}</p>
)}
</div>
);
}
// Interactive building-part ordering for the largest-count multi-entry sample
// (ADR-0004). The user labels each file position with a building part (one Main
// building + Extensions); the labels must form a permutation. Confirming
@ -227,18 +375,22 @@ function MultiEntryOrderingPanel({
sample,
ordering,
classifications,
stepLabel,
portfolioId,
uploadId,
}: {
sample: MultiEntrySample;
ordering: MultiEntryOrdering | null;
classifications: SampleClassifications;
stepLabel?: string;
portfolioId: string;
uploadId: string;
}) {
const confirm = useConfirmMultiEntryOrdering(portfolioId, uploadId);
const editClassification = useEditClassification(portfolioId, uploadId);
const count = sample.count;
// Only the multi-valued columns are sliced into building parts; single-value
// columns are whole-dwelling facts (verified in Step 1, not ordered here).
const orderColumns = sample.columns.filter((column) => column.entries.length > 1);
// assignment[filePosition] = building-part slot. Seed from a stored ordering
// (slot -> position, so invert) or default to identity (main building first).
@ -270,7 +422,7 @@ function MultiEntryOrderingPanel({
return (
<div className="rounded-lg border border-amber-200 bg-amber-50 px-4 py-3">
<p className="text-sm font-semibold text-amber-900">
Confirm building-part order
{stepLabel ? `${stepLabel}` : ""}Confirm building-part order
</p>
<p className="mt-0.5 text-xs text-amber-800">
{sample.address ? <span className="font-medium">{sample.address}</span> : "An address"}{" "}
@ -284,7 +436,7 @@ function MultiEntryOrderingPanel({
<thead>
<tr className="text-left text-amber-700">
<th className="py-1 pr-3 font-medium">Entry</th>
{sample.columns.map((column) => (
{orderColumns.map((column) => (
<th key={column.field} className="py-1 pr-3 font-medium">
{column.header}
</th>
@ -296,37 +448,19 @@ function MultiEntryOrderingPanel({
{Array.from({ length: count }).map((_, position) => (
<tr key={position} className="border-t border-amber-100 text-amber-900">
<td className="py-1 pr-3 text-amber-600">{position + 1}</td>
{sample.columns.map((column) => {
{orderColumns.map((column) => {
const entry = column.entries[position];
const classified = entry
? classifications[column.field]?.[entry.description] ?? ""
: "";
const options = CATEGORY_VALUES[column.field] ?? [];
return (
<td key={column.field} className="py-1 pr-3 align-top">
<div>{entry?.raw ?? "—"}</div>
{/* Read-only classification annotation; edit it in Step 1. */}
{entry && (
<select
value={classified}
onChange={(e) =>
editClassification.mutate({
field: column.field,
description: entry.description,
value: e.target.value,
})
}
disabled={editClassification.isPending}
className="mt-0.5 max-w-[12rem] rounded border border-amber-300 bg-white px-1.5 py-0.5 text-[11px] text-amber-900"
>
<option value="" disabled>
{classified ? classified : "not classified"}
</option>
{options.map((opt) => (
<option key={opt} value={opt}>
{opt}
</option>
))}
</select>
<div className="mt-0.5 text-[11px] text-amber-600">
{classified || "not classified"}
</div>
)}
</td>
);
@ -350,15 +484,6 @@ function MultiEntryOrderingPanel({
</table>
</div>
<p className="mt-2 text-[11px] text-amber-700">
Correcting a classification updates that description for{" "}
<span className="font-medium">every</span> row across the portfolio, not
just this address.
</p>
{editClassification.error && (
<p className="mt-1 text-xs text-red-600">{editClassification.error.message}</p>
)}
{!valid && (
<p className="mt-2 text-xs text-red-600">
Each part (Main building, Extension 1, ) must be used exactly once.

View file

@ -161,6 +161,25 @@ export function useConfirmMultiEntryOrdering(portfolioId: string, uploadId: stri
});
}
// Records the "Verify classification" acknowledgement (ADR-0004 Step 1),
// unlocking Finalise. Per-row corrections go through useEditClassification.
export function useConfirmVerification(portfolioId: string, uploadId: string) {
const queryClient = useQueryClient();
return useMutation<BulkUpload, Error, void>({
mutationFn: async () => {
const res = await fetch(
`/api/portfolio/${portfolioId}/bulk-uploads/${uploadId}/verify-classification`,
{ method: "PATCH" },
);
if (!res.ok) throw await parseError(res, "Failed to confirm classification.");
return res.json();
},
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: bulkUploadKeys.progress(uploadId) });
},
});
}
export function useStartAddressMatching(portfolioId: string, uploadId: string) {
const queryClient = useQueryClient();
return useMutation<{ taskId: string }, Error, void>({

View file

@ -0,0 +1,86 @@
import { describe, expect, it } from "vitest";
import { detectMultiEntry, assignmentToPermutation, isPermutation } from "./multiEntry";
// field -> source header, the shape stored on the upload. property_type and
// built_form_type intentionally share a header (the classifier allows it).
const MAPPING = {
address_1: "Addr",
postcode: "PC",
property_type: "Property Type",
wall_type: "Walls",
roof_type: "Roofs",
};
describe("detectMultiEntry", () => {
it("returns an empty summary when no classifier columns are mapped", () => {
const rows = [{ Addr: "1 High St", PC: "AB1 2CD" }];
const summary = detectMultiEntry(rows, { address_1: "Addr", postcode: "PC" });
expect(summary.sample).toBeNull();
expect(summary.largestCount).toBe(0);
});
it("captures a single-part verify sample when classifier columns exist but no row is multi-entry", () => {
const rows = [
{ Addr: "1 High St", PC: "AB1 2CD", "Property Type": "House: EndTerrace", Walls: "Cavity: AsBuilt", Roofs: "Pitched: 200mm" },
];
const summary = detectMultiEntry(rows, MAPPING);
// Not multi-entry, but there IS a sample to verify (ADR-0004 Step 1).
expect(summary.largestCount).toBe(0);
expect(summary.sample).not.toBeNull();
expect(summary.sample!.count).toBe(1);
expect(summary.sample!.address).toBe("1 High St, AB1 2CD");
// All three mapped classifier columns are present, one entry each.
expect(summary.sample!.columns.map((c) => c.field).sort()).toEqual([
"property_type",
"roof_type",
"wall_type",
]);
expect(summary.sample!.columns.every((c) => c.entries.length === 1)).toBe(true);
});
it("picks the largest-count row as the sample and reports it as multi-entry", () => {
const rows = [
{ Addr: "1 High St", PC: "AB1 2CD", "Property Type": "House: EndTerrace", Walls: "Cavity: AsBuilt", Roofs: "Pitched: 200mm" },
{ Addr: "2 Low St", PC: "AB3 4EF", "Property Type": "House: Detached", Walls: "Cavity: AsBuilt, Cavity: Filled", Roofs: "Flat: AsBuilt, Pitched: 200mm" },
];
const summary = detectMultiEntry(rows, MAPPING);
expect(summary.largestCount).toBe(2);
expect(summary.countDistribution).toEqual({ "2": 1 });
expect(summary.sample!.address).toBe("2 Low St, AB3 4EF");
expect(summary.sample!.count).toBe(2);
// multiValuedFields are the ones that actually split.
expect([...summary.multiValuedFields].sort()).toEqual(["roof_type", "wall_type"]);
// The whole-dwelling Property Type column is still carried (for Step 1),
// with a single entry — Step 2 filters it out by entries.length.
const propertyCol = summary.sample!.columns.find((c) => c.field === "property_type");
expect(propertyCol?.entries).toHaveLength(1);
const wallCol = summary.sample!.columns.find((c) => c.field === "wall_type");
expect(wallCol?.entries.map((e) => e.raw)).toEqual(["Cavity: AsBuilt", "Cavity: Filled"]);
});
it("normalizes descriptions to lower-case (matching the classifier's key)", () => {
const rows = [{ Addr: "1 High St", PC: "AB1 2CD", "Property Type": "House: EndTerrace", Walls: "", Roofs: "" }];
const summary = detectMultiEntry(rows, MAPPING);
const entry = summary.sample!.columns.find((c) => c.field === "property_type")!.entries[0];
expect(entry.raw).toBe("House: EndTerrace");
expect(entry.description).toBe("house: endterrace");
});
});
describe("ordering helpers", () => {
it("isPermutation accepts a bijection and rejects duplicates/out-of-range", () => {
expect(isPermutation([0, 1])).toBe(true);
expect(isPermutation([1, 0, 2])).toBe(true);
expect(isPermutation([0, 0])).toBe(false);
expect(isPermutation([0, 2])).toBe(false);
});
it("assignmentToPermutation inverts assignment[pos]=slot to permutation[slot]=pos", () => {
// file position 1 holds the main building (slot 0), position 0 is extension 1.
expect(assignmentToPermutation([1, 0])).toEqual([1, 0]);
expect(assignmentToPermutation([0, 1, 2])).toEqual([0, 1, 2]);
expect(assignmentToPermutation([2, 0, 1])).toEqual([1, 2, 0]);
});
});

View file

@ -90,10 +90,17 @@ function buildAddress(
return parts.join(", ");
}
// Scan the mapped classifier columns for multi-entry rows and capture the
// largest-count sample. Only classifier columns are considered — they're the
// physical-element descriptions we slice into building parts; address columns
// are single-valued by nature.
// Scan the mapped classifier columns and capture one sample address. Only
// classifier columns are considered — they're the physical-element descriptions
// we slice into building parts; address columns are single-valued by nature.
//
// The sample serves both review steps (ADR-0004): the largest-count multi-entry
// row when one exists (Step 2 — Confirm order), otherwise the first row carrying
// any classifier value so Step 1 — Verify classification still has something to
// show. `largestCount >= 2` is the multi-entry signal; `sample != null` means
// "there is something to verify". The sample carries every mapped classifier
// column with a value — Step 1 lists them all; Step 2 renders only the
// multi-valued ones.
export function detectMultiEntry(
rows: Array<Record<string, unknown>>,
columnMapping: Record<string, string>,
@ -104,38 +111,49 @@ export function detectMultiEntry(
const multiValued = new Set<string>();
const countDistribution: Record<string, number> = {};
let largestCount = 0;
let sampleRowIndex = -1;
let multiEntryRowIndex = -1;
// Fallback sample for Step 1 when no row is multi-entry: the first row that
// carries any classifier value.
let firstClassifiedRowIndex = -1;
rows.forEach((row, index) => {
let rowMax = 0;
let hasValue = false;
for (const [field, header] of classifierCols) {
const n = splitEntries(row[header]).length;
if (n > 0) hasValue = true;
if (n > 1) multiValued.add(field);
if (n > rowMax) rowMax = n;
}
if (hasValue && firstClassifiedRowIndex === -1) firstClassifiedRowIndex = index;
if (rowMax >= 2) {
const key = String(rowMax);
countDistribution[key] = (countDistribution[key] ?? 0) + 1;
// First row at a new maximum becomes the sample.
// First row at a new maximum becomes the multi-entry sample.
if (rowMax > largestCount) {
largestCount = rowMax;
sampleRowIndex = index;
multiEntryRowIndex = index;
}
}
});
if (sampleRowIndex === -1) return EMPTY_MULTI_ENTRY_SUMMARY;
const sampleRowIndex =
multiEntryRowIndex !== -1 ? multiEntryRowIndex : firstClassifiedRowIndex;
if (sampleRowIndex === -1) {
return { multiValuedFields: [...multiValued], countDistribution, largestCount, sample: null };
}
const sampleRow = rows[sampleRowIndex];
// Show only the columns that are actually split in the sample row;
// single-value columns are whole-dwelling facts, not building parts.
// Every mapped classifier column with a value in the sample row. Step 1 lists
// them all; Step 2's ordering table filters to the multi-valued ones
// (single-value columns are whole-dwelling facts, not building parts).
const columns: MultiEntryColumn[] = classifierCols
.map(([field, header]) => ({
field,
header,
entries: splitEntries(sampleRow[header]),
}))
.filter((column) => column.entries.length > 1);
.filter((column) => column.entries.length > 0);
return {
multiValuedFields: [...multiValued],
@ -143,7 +161,7 @@ export function detectMultiEntry(
largestCount,
sample: {
address: buildAddress(sampleRow, columnMapping),
count: largestCount,
count: largestCount >= 2 ? largestCount : 1,
columns,
},
};

View file

@ -289,8 +289,12 @@ export async function setMultiEntryOrdering(
if (upload.status !== "awaiting_review")
return { kind: "wrong_state", current: upload.status };
const sample = upload.multiEntrySummary?.sample ?? null;
if (!sample) return { kind: "not_multi_entry" };
const summary = upload.multiEntrySummary;
// A sample now exists for non-multi-entry uploads too (Step 1's verify
// sample), so "is multi-entry" is largestCount >= 2, not "has a sample".
if (!summary || summary.largestCount < 2 || !summary.sample)
return { kind: "not_multi_entry" };
const sample = summary.sample;
const largest = String(sample.count);
if (!permutations[largest])
@ -310,6 +314,30 @@ export async function setMultiEntryOrdering(
return { kind: "ok", upload: updated };
}
export type SetVerifyAckOutcome =
| { kind: "ok"; upload: BulkUpload }
| { kind: "not_found" }
| { kind: "wrong_state"; current: string };
// Record the user's "Verify classification" acknowledgement (ADR-0004 Step 1).
// Allowed only at awaiting_review. Gates Finalise whenever the upload has
// classifier columns — independent of multi-entry, hence its own column rather
// than a flag on multiEntryOrdering.
export async function setVerifyAck(uploadId: string): Promise<SetVerifyAckOutcome> {
const upload = await loadById(uploadId);
if (!upload) return { kind: "not_found" };
if (upload.status !== "awaiting_review")
return { kind: "wrong_state", current: upload.status };
const [updated] = await db
.update(bulkAddressUploads)
.set({ verifyAck: true })
.where(eq(bulkAddressUploads.id, uploadId))
.returning();
if (!updated) return { kind: "not_found" };
return { kind: "ok", upload: updated };
}
export type SetMappingOutcome =
| { kind: "ok"; upload: BulkUpload }
| { kind: "not_found" }