lanlord exetension application

This commit is contained in:
Jun-te Kim 2026-06-02 12:57:05 +00:00
parent 90407799ac
commit 38c82ebca3
8 changed files with 364 additions and 38 deletions

View file

@ -15,7 +15,33 @@
"Bash(npx drizzle-kit *)",
"Bash(echo \"frontend tsc exit: $?\")",
"Bash(python3 -c ' *)",
"Bash(rm -f /workspaces/home/github/Model/backend/address2UPRN/local_handler/.env.local /workspaces/home/github/Model/backend/bulk_address2uprn_combiner/local_handler/.env.local && echo \"removed stub .env.local files\")"
"Bash(rm -f /workspaces/home/github/Model/backend/address2UPRN/local_handler/.env.local /workspaces/home/github/Model/backend/bulk_address2uprn_combiner/local_handler/.env.local && echo \"removed stub .env.local files\")",
"Bash(cat deployment/terraform/modules/s3_iam_policy/main.tf)",
"Bash(cat deployment/terraform/modules/s3_iam_policy/variables.tf)",
"Bash(terraform fmt *)",
"Bash(echo \"exit: $?\")",
"Bash(pip install *)",
"Bash(git -C /workspaces/assessment-model remote -v)",
"Bash(gh label *)",
"Bash(gh issue create --repo Hestia-Homes/assessment-model --label ready-for-agent --title 'Detect multi-entry rows and surface the largest-count sample on awaiting_review' --body ' *)",
"Bash(gh issue create --repo Hestia-Homes/assessment-model --label ready-for-agent --title 'Confirm building-part ordering and gate Finalise on it' --body ' *)",
"Bash(gh issue create --repo Hestia-Homes/assessment-model --label ready-for-agent --title 'Show our classification next to each multi-entry sample entry \\(read-only\\)' --body ' *)",
"Bash(gh issue create --repo Hestia-Homes/assessment-model --label ready-for-agent --title 'Editable classification verification writing source='\\\\''user'\\\\'', gating Finalise' --body ' *)",
"Bash(git check-ignore *)",
"Bash(git ls-tree *)",
"Bash(git worktree *)",
"Read(//workspaces/mig-wt/src/app/db/**)",
"Read(//workspaces/mig-wt/src/app/db/migrations/meta/**)",
"Bash(git branch *)",
"Bash(cp /workspaces/assessment-model/.env.local /tmp/mig-wt/.env.local; echo \"env copied\"; cat -n /tmp/mig-wt/src/app/db/schema/bulk_address_uploads.ts)",
"Bash(node /workspaces/assessment-model/node_modules/drizzle-kit/bin.cjs generate)",
"Bash(ln -s /workspaces/assessment-model/node_modules /tmp/mig-wt/node_modules)",
"Bash(node node_modules/drizzle-kit/bin.cjs generate)",
"Bash(git push *)",
"Bash(npm run *)",
"Bash(grep '\\\\.sql$')",
"Bash(git status *)",
"Bash(git checkout *)"
],
"deny": [
"Bash(npx drizzle-kit generate)",
@ -25,7 +51,9 @@
"/workspaces/home/github/Model/backend/app/bulk_uploads",
"/workspaces/home/github/Model/applications/landlord_description_overrides",
"/workspaces/home/github/Model/orchestration",
"/workspaces/home/github/Model/backend/address2UPRN/local_handler"
"/workspaces/home/github/Model/backend/address2UPRN/local_handler",
"/workspaces/home/github/Model/deployment/terraform/shared",
"/tmp/mig-wt"
]
}
}

View file

@ -45,6 +45,27 @@ _Avoid_: customer data, manual override, landlord data
The translation from a Landlord's free-text description in a BulkUpload column (e.g. `"cavity: filledcavity"`) to a canonical domain enum value (e.g. `WallType.CAVITY`). Produced by a `ColumnClassifier` (today an LLM, tomorrow possibly a lookup table or rules engine) in the Model service. Stored per-Portfolio, one row per `(category, description)`. A row carries provenance (`classifier` or `user`) so user overrides survive re-classification.
_Avoid_: column mapping (that's a separate concept — see `ColumnMapping` above), classification, dictionary
### Building parts
**Building part**:
One physically distinct part of a dwelling described by a single entry within a multi-valued cell. A dwelling is one **Main building** plus zero or more **Extensions**. Per-part descriptions appear as comma-separated entries in physical-element columns (e.g. `Walls`, `Roofs`); whole-dwelling columns (e.g. `Property Type`) carry a single entry and are **not** split per part.
_Avoid_: annexe, unit, section, dwelling part
**Main building**:
The principal building part of a dwelling — exactly one per address. The others are **Extensions**.
**Extension**:
A building part that is not the Main building, numbered **Extension 1 … Extension N-1** for an N-entry address.
_Avoid_: annexe, addition, outbuilding
**Multi-entry**:
The property of a BulkUpload row whose physical-element cells hold **more than one comma-separated entry**, one per **Building part**. Always intra-cell in our data — never multiple rows sharing one address/UPRN. Within a row, the multi-valued columns agree on entry-count, so **position `i` is the same Building part across every multi-valued column**.
_Avoid_: multi-row, multi-record, duplicate address
**Building-part ordering** (a.k.a. **ordering**):
The user's declaration, captured once per file, of which list-position maps to which Building part — because the entry order is a consistent per-file mistake (`"A, B"` could be `[Main, Extension 1]` or `[Extension 1, Main]`). Stored per entry-count as a permutation. See [ADR-0004](./docs/adr/0004-multi-entry-building-part-ordering.md).
_Avoid_: sort order, sequence, column mapping
## Lifecycle
A **BulkUpload** moves through these statuses:
@ -65,6 +86,8 @@ Re-mapping (PATCHing `columnMapping`) is legal only in `ready_for_processing` an
**Two writers**: Next.js owns transitions out of `mapping_complete`, into `processing`, and the terminal Finalise outcomes. FastAPI owns `combining` and `awaiting_review` — writing them direct to the DB during the combiner run. The BulkUpload aggregate observes both.
At `awaiting_review`, **Finalise is gated** (not a new status — a precondition on the action): when classifier columns were mapped the user must acknowledge the classification-verification step, and when the file is **Multi-entry** they must confirm the **Building-part ordering**. See [ADR-0004](./docs/adr/0004-multi-entry-building-part-ordering.md).
See [ADR-0001](./docs/adr/0001-bulk-upload-state-machine.md) for the deliberate "not yet" decisions baked into this lifecycle.
## Relationships

View file

@ -3,20 +3,24 @@ import { getServerSession } from "next-auth";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
import { createS3Client, createRetrofitDataS3Client, retrofitDataS3Bucket } from "@/app/utils/s3";
import * as XLSX from "xlsx";
import { loadForAddressMatching, triggerAddressMatching, triggerClassifier } from "@/lib/bulkUpload/server";
import { loadForAddressMatching, saveMultiEntrySummary, triggerAddressMatching, triggerClassifier } from "@/lib/bulkUpload/server";
import { readSessionToken } from "@/lib/session";
import { ADDRESS_FIELDS } from "@/lib/bulkUpload/columnFields";
import { ADDRESS_FIELDS, classifierMapping } from "@/lib/bulkUpload/columnFields";
import { detectMultiEntry } from "@/lib/bulkUpload/multiEntry";
function transformFile(
buffer: Buffer,
columnMapping: Record<string, string> // field → source header
): { csv: string; error?: never } | { csv?: never; error: string } {
type SheetRow = Record<string, unknown>;
function readRows(buffer: Buffer): SheetRow[] {
const wb = XLSX.read(buffer, { type: "buffer" });
const sheet = wb.Sheets[wb.SheetNames[0]];
const rows = XLSX.utils.sheet_to_json<Record<string, unknown>>(sheet, { defval: "" });
if (rows.length === 0) return { error: "Empty file" };
return XLSX.utils.sheet_to_json<SheetRow>(sheet, { defval: "" });
}
// Address-matching CSV: address fields only, renamed to canonical headers.
function buildAddressCsv(
rows: SheetRow[],
columnMapping: Record<string, string> // field → source header
): { csv: string; error?: never } | { csv?: never; error: string } {
const outputHeaders: string[] = [];
const outputToSource: Record<string, string> = {};
for (const field of ADDRESS_FIELDS) {
@ -32,7 +36,7 @@ function transformFile(
return { error: 'Mapping must include "postcode"' };
const outputRows = rows.map((row) => {
const out: Record<string, unknown> = {};
const out: SheetRow = {};
for (const [outName, src] of Object.entries(outputToSource)) {
out[outName] = row[src] ?? "";
}
@ -43,6 +47,25 @@ function transformFile(
return { csv: XLSX.utils.sheet_to_csv(outSheet) };
}
// Classifier CSV: the mapped classifier source columns only, original headers
// preserved (the lambda resolves them via column_mapping). Converting here means
// the classifier always reads a real CSV even when the upload was .xlsx/.xls —
// see ADR-0003. One source header may feed several categories, so dedupe to
// distinct headers.
function buildClassifierCsv(
rows: SheetRow[],
classifierMap: Record<string, string> // category → source header
): string {
const headers = [...new Set(Object.values(classifierMap))];
const outputRows = rows.map((row) => {
const out: SheetRow = {};
for (const h of headers) out[h] = row[h] ?? "";
return out;
});
const outSheet = XLSX.utils.json_to_sheet(outputRows, { header: headers });
return XLSX.utils.sheet_to_csv(outSheet);
}
export async function POST(
request: NextRequest,
{ params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
@ -81,7 +104,15 @@ export async function POST(
return NextResponse.json({ error: "Failed to read source file" }, { status: 500 });
}
const transformed = transformFile(fileBuffer, upload.columnMapping!);
const rows = readRows(fileBuffer);
if (rows.length === 0)
return NextResponse.json({ error: "Empty file" }, { status: 422 });
// Detect multi-entry building parts now, while the whole file is parsed in
// memory, so the awaiting_review surface never re-reads it (ADR-0004).
await saveMultiEntrySummary(uploadId, detectMultiEntry(rows, upload.columnMapping!));
const transformed = buildAddressCsv(rows, upload.columnMapping!);
if (transformed.error)
return NextResponse.json({ error: transformed.error }, { status: 422 });
@ -102,13 +133,37 @@ export async function POST(
const s3Uri = `s3://${outputBucket}/${transformedKey}`;
// Convert the mapped classifier columns to their own CSV so the classifier
// lambda always parses a real CSV, never the raw upload (which may be
// .xlsx/.xls). Only when the user mapped ≥1 classifier column. See ADR-0003.
const classifierMap = classifierMapping(upload.columnMapping!);
let classifierS3Uri: string | undefined;
if (Object.keys(classifierMap).length > 0) {
const classifierKey = `bulk_onboarding_inputs/${portfolioId}/${uploadId}-classifier.csv`;
try {
await outputS3
.putObject({
Bucket: outputBucket,
Key: classifierKey,
Body: buildClassifierCsv(rows, classifierMap),
ContentType: "text/csv",
})
.promise();
classifierS3Uri = `s3://${outputBucket}/${classifierKey}`;
} catch (err) {
// Non-blocking: classification is skipped, address matching proceeds.
console.error("Failed to upload classifier CSV:", err);
}
}
const sessionToken = readSessionToken(request);
const trigger = await triggerAddressMatching({ uploadId, s3Uri, sessionToken });
if (trigger.kind === "trigger_failed")
return NextResponse.json({ error: trigger.message }, { status: trigger.status });
// Co-fire the landlord classifier (non-blocking) under the same task.
await triggerClassifier({ taskId: trigger.taskId, uploadId, sessionToken });
if (classifierS3Uri)
await triggerClassifier({ taskId: trigger.taskId, uploadId, s3Uri: classifierS3Uri, sessionToken });
return NextResponse.json({ taskId: trigger.taskId }, { status: 200 });
}

View file

@ -1,6 +1,30 @@
import { pgTable, uuid, text, timestamp, jsonb } from "drizzle-orm/pg-core";
import { sql } from "drizzle-orm";
// Shape of the multi_entry_summary jsonb (ADR-0004). Co-located with the column
// so the schema is self-contained; the detection logic in
// src/lib/bulkUpload/multiEntry.ts imports these.
export interface MultiEntryEntry {
raw: string;
description: string;
}
export interface MultiEntryColumn {
field: string;
header: string;
entries: MultiEntryEntry[];
}
export interface MultiEntrySample {
address: string;
count: number;
columns: MultiEntryColumn[];
}
export interface MultiEntrySummary {
multiValuedFields: string[];
countDistribution: Record<string, number>;
largestCount: number;
sample: MultiEntrySample | null;
}
export const bulkAddressUploads = pgTable("bulk_address_uploads", {
id: uuid("id").defaultRandom().primaryKey(),
portfolioId: text("portfolio_id").notNull(),
@ -11,6 +35,9 @@ export const bulkAddressUploads = pgTable("bulk_address_uploads", {
status: text("status").notNull().default("ready_for_processing"),
sourceHeaders: text("source_headers").array().notNull().default(sql`'{}'`),
columnMapping: jsonb("column_mapping").$type<Record<string, string>>(),
// Multi-entry building-part detection, computed at start-address-matching
// and read by the awaiting_review review surface (ADR-0004).
multiEntrySummary: jsonb("multi_entry_summary").$type<MultiEntrySummary>(),
taskId: uuid("task_id"),
combinedOutputS3Uri: text("combined_output_s3_uri"),
createdAt: timestamp("created_at", { withTimezone: true }).notNull().defaultNow(),

View file

@ -8,6 +8,7 @@ import {
useFinalize,
useRequestCombine,
} from "@/lib/bulkUpload/client";
import type { MultiEntrySample } from "@/lib/bulkUpload/multiEntry";
interface Props {
portfolioSlug: string;
@ -60,6 +61,13 @@ export default function OnboardingProgress({
const canRunCombiner = taskDone && !taskFailed && upload.status === "processing";
const canFinalize = upload.status === "awaiting_review";
// Multi-entry building-part sample, shown read-only on the review surface
// (ADR-0004). Ordering confirmation arrives in a later slice.
const multiEntrySample =
upload.status === "awaiting_review"
? (upload.multiEntrySummary?.sample ?? null)
: null;
return (
<div className="mt-6 space-y-3">
<div className="w-full bg-gray-100 rounded-full h-2 overflow-hidden">
@ -70,17 +78,24 @@ export default function OnboardingProgress({
</div>
<div className="flex items-center gap-4 text-xs text-gray-500">
{total > 0 && (
<span>
<span className="font-semibold text-gray-700">{completedSubtasks}</span> / {total} batches complete
</span>
)}
{failedSubtasks > 0 && (
<span className="flex items-center gap-1 text-red-500 font-semibold">
<span className="w-1.5 h-1.5 rounded-full bg-red-400" />
{failedSubtasks} failed
</span>
)}
{/* Address matching: standardises addresses against the OS lookup, in batches. */}
<span className="flex items-center gap-1">
<span className="text-gray-400">Address matching:</span>
{failedSubtasks > 0 ? (
<span className="flex items-center gap-1 text-red-500 font-semibold">
<span className="w-1.5 h-1.5 rounded-full bg-red-400" />
{failedSubtasks} of {total} batches failed
</span>
) : total > 0 && completedSubtasks >= total ? (
<span className="font-semibold text-green-600">complete</span>
) : (
<span className="flex items-center gap-1 text-blue-500">
<span className="w-1.5 h-1.5 rounded-full bg-blue-400 animate-pulse" />
running{total > 0 ? ` · ${completedSubtasks} / ${total} batches` : ""}
</span>
)}
</span>
{/* Classification: turns the landlord's free-text descriptions into EPC categories. */}
{classifierTotal > 0 && (
<span className="flex items-center gap-1">
<span className="text-gray-400">Classification:</span>
@ -99,12 +114,6 @@ export default function OnboardingProgress({
)}
</span>
)}
{!taskDone && (
<span className="flex items-center gap-1 text-blue-500">
<span className="w-1.5 h-1.5 rounded-full bg-blue-400 animate-pulse" />
Running
</span>
)}
{isCombining && (
<span className="flex items-center gap-1 text-blue-500">
<span className="w-1.5 h-1.5 rounded-full bg-blue-400 animate-pulse" />
@ -119,6 +128,8 @@ export default function OnboardingProgress({
)}
</div>
{multiEntrySample && <MultiEntrySamplePanel sample={multiEntrySample} />}
{(canRunCombiner || canFinalize) && (
<div className="flex flex-col gap-2 pt-2">
{canRunCombiner && (
@ -164,6 +175,51 @@ export default function OnboardingProgress({
);
}
// Read-only preview of the largest-count multi-entry row (ADR-0004). Each
// comma-separated entry is a building part; the user will confirm their order
// in a later slice. Positions are shown 1-based, unlabelled for now.
function MultiEntrySamplePanel({ sample }: { sample: MultiEntrySample }) {
return (
<div className="rounded-lg border border-amber-200 bg-amber-50 px-4 py-3">
<p className="text-sm font-semibold text-amber-900">
Multiple building parts detected
</p>
<p className="mt-0.5 text-xs text-amber-800">
{sample.address ? <span className="font-medium">{sample.address}</span> : "An address"}{" "}
has {sample.count} building parts (e.g. a main building and extensions).
You&apos;ll be asked to confirm their order before finalising.
</p>
<div className="mt-3 overflow-x-auto">
<table className="w-full border-collapse text-xs">
<thead>
<tr className="text-left text-amber-700">
<th className="py-1 pr-3 font-medium">Position</th>
{sample.columns.map((column) => (
<th key={column.field} className="py-1 pr-3 font-medium">
{column.header}
</th>
))}
</tr>
</thead>
<tbody>
{Array.from({ length: sample.count }).map((_, position) => (
<tr key={position} className="border-t border-amber-100 text-amber-900">
<td className="py-1 pr-3 text-amber-600">{position + 1}</td>
{sample.columns.map((column) => (
<td key={column.field} className="py-1 pr-3">
{column.entries[position]?.raw ?? "—"}
</td>
))}
</tr>
))}
</tbody>
</table>
</div>
</div>
);
}
function StageButton({
label,
activeLabel,

View file

@ -104,8 +104,8 @@ export default function MapColumnsClient({
className="w-full text-sm border border-gray-200 rounded-lg px-3 py-2 bg-white text-gray-800 focus:outline-none focus:ring-2 focus:ring-[#14163d]/20 focus:border-[#14163d]"
>
<option value={NOT_PROVIDED}>Not provided</option>
{sourceHeaders.map((header) => (
<option key={header} value={header}>
{sourceHeaders.map((header, index) => (
<option key={`${header}-${index}`} value={header}>
{header}
</option>
))}

View file

@ -0,0 +1,119 @@
// Multi-entry building-part detection (ADR-0004).
//
// A BulkUpload row can carry several comma-separated entries in a physical-
// element column (e.g. Walls = "Cavity: AsBuilt (1976-1982), Cavity:
// FilledCavity"). Each entry is a Building part (Main building + Extensions).
// This module finds that pattern and captures one sample — the row with the
// MOST building parts — so the user can confirm the ordering downstream.
//
// Pure + I/O-free so it's unit-testable; the start-address-matching route runs
// it over the already-parsed upload rows and persists the result on the upload.
import { ADDRESS_FIELDS, classifierMapping } from "./columnFields";
import type {
MultiEntryEntry,
MultiEntryColumn,
MultiEntrySummary,
} from "@/app/db/schema/bulk_address_uploads";
// The jsonb shape lives with the column (schema/bulk_address_uploads.ts) so the
// migration is self-contained; re-export here for callers of this module.
export type {
MultiEntryEntry,
MultiEntryColumn,
MultiEntrySample,
MultiEntrySummary,
} from "@/app/db/schema/bulk_address_uploads";
export const EMPTY_MULTI_ENTRY_SUMMARY: MultiEntrySummary = {
multiValuedFields: [],
countDistribution: {},
largestCount: 0,
sample: null,
};
// Split a cell into building-part entries. Mirrors the classifier's
// split(",") → trim → lower, dropping empty fragments so positions align
// across raw and normalized forms.
export function splitEntries(value: unknown): MultiEntryEntry[] {
return String(value ?? "")
.split(",")
.map((s) => s.trim())
.filter((s) => s.length > 0)
.map((raw) => ({ raw, description: raw.toLowerCase() }));
}
// Compose a display address from the mapped address fields (reference excluded).
function buildAddress(
row: Record<string, unknown>,
columnMapping: Record<string, string>,
): string {
const parts: string[] = [];
for (const field of ADDRESS_FIELDS) {
if (field.value === "internal_reference") continue;
const header = columnMapping[field.value];
if (!header) continue;
const value = String(row[header] ?? "").trim();
if (value) parts.push(value);
}
return parts.join(", ");
}
// Scan the mapped classifier columns for multi-entry rows and capture the
// largest-count sample. Only classifier columns are considered — they're the
// physical-element descriptions we slice into building parts; address columns
// are single-valued by nature.
export function detectMultiEntry(
rows: Array<Record<string, unknown>>,
columnMapping: Record<string, string>,
): MultiEntrySummary {
const classifierCols = Object.entries(classifierMapping(columnMapping));
if (classifierCols.length === 0) return EMPTY_MULTI_ENTRY_SUMMARY;
const multiValued = new Set<string>();
const countDistribution: Record<string, number> = {};
let largestCount = 0;
let sampleRowIndex = -1;
rows.forEach((row, index) => {
let rowMax = 0;
for (const [field, header] of classifierCols) {
const n = splitEntries(row[header]).length;
if (n > 1) multiValued.add(field);
if (n > rowMax) rowMax = n;
}
if (rowMax >= 2) {
const key = String(rowMax);
countDistribution[key] = (countDistribution[key] ?? 0) + 1;
// First row at a new maximum becomes the sample.
if (rowMax > largestCount) {
largestCount = rowMax;
sampleRowIndex = index;
}
}
});
if (sampleRowIndex === -1) return EMPTY_MULTI_ENTRY_SUMMARY;
const sampleRow = rows[sampleRowIndex];
// Show only the columns that are actually split in the sample row;
// single-value columns are whole-dwelling facts, not building parts.
const columns: MultiEntryColumn[] = classifierCols
.map(([field, header]) => ({
field,
header,
entries: splitEntries(sampleRow[header]),
}))
.filter((column) => column.entries.length > 1);
return {
multiValuedFields: [...multiValued],
countDistribution,
largestCount,
sample: {
address: buildAddress(sampleRow, columnMapping),
count: largestCount,
columns,
},
};
}

View file

@ -6,6 +6,7 @@ import { count, desc, eq, sql } from "drizzle-orm";
import type { BulkUpload, BulkUploadStatus, ProgressView, TaskSummary } from "./types";
import { validateColumnMapping, classifierMapping } from "./columnFields";
import { SUBTASK_SERVICE } from "./types";
import type { MultiEntrySummary } from "./multiEntry";
const REMAP_ALLOWED: ReadonlySet<BulkUploadStatus> = new Set([
"ready_for_processing",
@ -102,6 +103,20 @@ export async function getProgressView(uploadId: string): Promise<ProgressView |
return { upload, task };
}
// Persist the multi-entry building-part detection (ADR-0004). Computed once at
// start-address-matching from the already-parsed rows; read back on the
// awaiting_review surface. Only this column is touched, so the later
// status/taskId update leaves it intact.
export async function saveMultiEntrySummary(
uploadId: string,
summary: MultiEntrySummary,
): Promise<void> {
await db
.update(bulkAddressUploads)
.set({ multiEntrySummary: summary })
.where(eq(bulkAddressUploads.id, uploadId));
}
export type SetMappingOutcome =
| { kind: "ok"; upload: BulkUpload }
| { kind: "not_found" }
@ -211,13 +226,16 @@ export async function triggerAddressMatching(args: {
return { kind: "ok", taskId: task.id };
}
// Co-fires the landlord classifier as a subtask under the address task. Reads
// the ORIGINAL upload (the address-matching CSV strips the description columns)
// and is non-blocking: a trigger failure marks only the classifier subtask, so
// address matching is unaffected. See ADR-0003.
// Co-fires the landlord classifier as a subtask under the address task. Reads a
// dedicated classifier CSV (the classifier columns converted from the upload by
// the start-address-matching route — the address-matching CSV strips the
// description columns), so the lambda always parses a real CSV even for
// .xlsx/.xls uploads. Non-blocking: a trigger failure marks only the classifier
// subtask, so address matching is unaffected. See ADR-0003.
export async function triggerClassifier(args: {
taskId: string;
uploadId: string;
s3Uri: string;
sessionToken: string | undefined;
}): Promise<void> {
const upload = await loadById(args.uploadId);
@ -239,7 +257,7 @@ export async function triggerClassifier(args: {
const payload = {
task_id: args.taskId,
sub_task_id: subTask.id,
s3_uri: `s3://${upload.s3Bucket}/${upload.s3Key}`,
s3_uri: args.s3Uri,
portfolio_id: Number(upload.portfolioId),
column_mapping: columnMapping,
};