diff --git a/.claude/settings.json b/.claude/settings.json index 7b99cc77..878b2d3b 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -1,8 +1,28 @@ { "permissions": { + "allow": [ + "Read(//home/vscode/.claude/skills/grill-me/**)", + "Bash(grep \"\\\\.py$\")", + "Bash(git fetch *)", + "Bash(git add *)", + "Bash(git commit *)", + "Bash(git merge *)", + "Read(//home/vscode/.claude/skills/to-issues/**)", + "Read(//home/vscode/.claude/skills/triage/**)", + "Read(//home/vscode/.claude/skills/**)", + "Bash(echo \"tsc exit: $?\")", + "Bash(npm install *)", + "Bash(npx drizzle-kit *)", + "Bash(echo \"frontend tsc exit: $?\")" + ], "deny": [ "Bash(npx drizzle-kit generate)", "Bash(npx drizzle-kit push)" + ], + "additionalDirectories": [ + "/workspaces/home/github/Model/backend/app/bulk_uploads", + "/workspaces/home/github/Model/applications/landlord_description_overrides", + "/workspaces/home/github/Model/orchestration" ] } } diff --git a/docs/wip/landlord-override-frontend-plan.md b/docs/wip/landlord-override-frontend-plan.md index 9cf4a362..a67f666d 100644 --- a/docs/wip/landlord-override-frontend-plan.md +++ b/docs/wip/landlord-override-frontend-plan.md @@ -118,10 +118,13 @@ framing, which was wrong once classifier header-sharing became a requirement. POST-back. - **State machine:** the classifier runs as a **subtask under the same address task** (not a separate task, not a new `BulkUpload` status). Both subtasks - fire together at the **"Start address matching"** action. Safe: the combiner - globs S3 `ara_raw_outputs/{task_id}/`, which the classifier never writes to, - so the combined address output is not affected; `subtask_handler` does no - sibling-completion gating. + fire together at the **"Start address matching"** action. The combined address + *output* is not affected (the classifier writes Postgres, not + `ara_raw_outputs/{task_id}/`), **but** the parent Task *status* IS recomputed + from all subtasks (`TaskOrchestrator._cascade`), so a classifier failure fails + the onboarding task and gates the combiner. This coupling was knowingly + accepted (2026-05-28), superseding Q1 non-blocking — see ADR-0003's Amended + note. (Corrects an earlier wrong claim that the envelope does no gating.) - **Progress honesty:** add a nullable **`service`/`kind` discriminator to `sub_task`** (existing rows = address/legacy) so the progress view shows address batches vs classification separately and attributes failures @@ -164,9 +167,9 @@ ADR-0002 anticipates. Grilling is complete (Q1–Q7). Suggested follow-ups: -1. Promote Q4 (trigger + state-machine integration) to a **frontend ADR-0003**; - cross-link the backend's ADR-0003 (which already superseded ADR-0002's - Next.js-writes clause). +1. **Done** — Q4 promoted to + [ADR-0003](../adr/0003-classifier-triggers-as-address-subtask.md) (trigger + + state-machine integration), cross-linking the backend's ADR-0003. 2. Break the work into issues (`/to-issues`): (a) invert the mapping shape + UI + migration + validation; (b) `sub_task` discriminator + progress view; (c) classifier trigger (new FastAPI endpoint + payload, fire at "Start"); diff --git a/docs/wip/landlord-override-verification.md b/docs/wip/landlord-override-verification.md new file mode 100644 index 00000000..82bcff08 --- /dev/null +++ b/docs/wip/landlord-override-verification.md @@ -0,0 +1,88 @@ +# Landlord override e2e — verification & deploy checklist + +**Created:** 2026-05-28 +**Branch:** `feature/frontend_landlord_overrides` (this repo) + `feature/landlord_data` (Model repo) +**Plan:** [landlord-override-frontend-plan.md](./landlord-override-frontend-plan.md) · **ADR:** [0003-classifier-triggers-as-address-subtask.md](../adr/0003-classifier-triggers-as-address-subtask.md) + +## Context for picking this up cold + +The landlord-classifier e2e is **implemented across both repos but uncommitted**, and +**statically verified only** (frontend `tsc` 0 errors, `next lint` clean, backend +`py_compile` clean). It has **not** been run live — that needs the steps below +(migrations applied, SQS queue + env, FastAPI endpoint + lambda deployed with +OpenAI/S3/Postgres access). Two migration files are **generated but not applied**: +`0215_invert_column_mapping.sql` (data) and `0216_add_subtask_service.sql` (schema). + +Work through the sections in order — each step's prerequisites come first. + +--- + +## A. Before you start +- [ ] Use dev/preview first, not prod. +- [ ] Confirm `.env.local` DB creds (`DB_HOST/PORT/USERNAME/PASSWORD/NAME`) point at the target DB. +- [ ] **Back up** `column_mapping` — the 0215 inversion is one-shot/irreversible: + ```sql + CREATE TABLE _bak_bulk_mapping AS + SELECT id, column_mapping FROM bulk_address_uploads WHERE column_mapping IS NOT NULL; + ``` + +## B. Database migrations ⚠️ read the gotcha +`0215` = data (inverts `header→field` → `field→header`); `0216` = schema (`ADD COLUMN sub_task.service`). + +⚠️ package.json only has `migration:push` (`drizzle-kit push`). **`push` diffs schema and will NOT run the 0215 data `UPDATE`** — it would add the column but silently skip the inversion. Use `migrate`: +- [ ] ```bash + npx drizzle-kit migrate # runs 0215 then 0216 in order + ``` + (If the team only uses `push`: run `push` for 0216, then execute `0215`'s SQL manually.) +- [ ] ⚠️ Run **0215 exactly once, on old-shape data**. Re-running re-inverts and corrupts. `migrate` guards via the journal; manual runs don't. +- [ ] Verify 0215 — values should now be headers: + ```sql + SELECT column_mapping FROM bulk_address_uploads WHERE column_mapping IS NOT NULL LIMIT 5; + -- expect {"address_1":"Addr 1","postcode":"PCode", ...} + ``` +- [ ] Verify 0216: + ```sql + SELECT 1 FROM information_schema.columns + WHERE table_name='sub_task' AND column_name='service'; + ``` + +## C. Backend deploy (Model service) +- [ ] Create an SQS queue for the classifier (e.g. `landlord-description-overrides`). +- [ ] Set **`LANDLORD_OVERRIDES_SQS_URL`** in the FastAPI env to that queue. +- [ ] Deploy FastAPI so `/v1/bulk-uploads/trigger-landlord-overrides` is live. +- [ ] Deploy the lambda (`applications/landlord_description_overrides`) + event-source mapping queue → lambda. +- [ ] Lambda env/IAM: `OPENAI_API_KEY`, Postgres creds, **S3 read on the original-upload bucket** (it reads `upload.s3Bucket/s3Key`, not `retrofit-data-dev`). + +## D. Frontend deploy +- [ ] Deploy `assessment-model` with the new code (`FASTAPI_API_URL` / `FASTAPI_API_KEY` already set). + +## E. Verify the UI (Column Remapper) +- [ ] Map-columns page shows **one row per field with a header dropdown**, split into **Address fields** + **Landlord description fields**. +- [ ] Leaving Address 1 / Postcode unset blocks submit. +- [ ] Two **address** fields → one column is blocked; the **same** column → Property Type + Built Form is allowed. +- [ ] Existing `mapping_complete` uploads open with their mapping intact (confirms 0215). + +## F. Verify trigger → classify → persist +- [ ] Map ≥1 landlord-description field, click **Start address matching**. +- [ ] `sub_task` has two rows under the task: `service='address2uprn'` and `service='landlord_description_overrides'`. +- [ ] SQS message enqueued + lambda ran (CloudWatch). +- [ ] Rows appear with `source='classifier'`: + ```sql + SELECT description, value FROM landlord_property_type_overrides WHERE portfolio_id = LIMIT 10; + ``` + +## G. Verify the results view +- [ ] `/portfolio//landlord-overrides` lists `description → value` per category with a "classifier" badge. (No nav link yet — reach by URL.) + +## H. Regression — address matching unaffected +- [ ] The same upload's address pipeline still emits the canonical CSV (`Address 1`/`postcode`) and combines normally. + +## I. Watch-out (by design — ADR-0003 "accepted coupling") +- [ ] If the classifier subtask **fails**, the shared onboarding task goes FAILED and **"Run Combiner" is blocked**; the task only COMPLETEs once classification finishes. If painful, switch to the separate-task design in the ADR. + +--- + +## Still open / not done +- [ ] Commit the work (this repo + Model repo, separately) — currently uncommitted. +- [ ] Nav link to `/portfolio//landlord-overrides` (reachable by URL only). +- [ ] User-edit write-back for overrides (deferred — Q7 "read-only this iteration"). diff --git a/src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/start-address-matching/route.ts b/src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/start-address-matching/route.ts index 5fd282fa..14441ce2 100644 --- a/src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/start-address-matching/route.ts +++ b/src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/start-address-matching/route.ts @@ -3,20 +3,13 @@ import { getServerSession } from "next-auth"; import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions"; import { createS3Client, createRetrofitDataS3Client, retrofitDataS3Bucket } from "@/app/utils/s3"; import * as XLSX from "xlsx"; -import { loadForAddressMatching, triggerAddressMatching } from "@/lib/bulkUpload/server"; +import { loadForAddressMatching, triggerAddressMatching, triggerClassifier } from "@/lib/bulkUpload/server"; import { readSessionToken } from "@/lib/session"; - -const FIELD_RENAME: Record = { - address_1: "Address 1", - address_2: "Address 2", - address_3: "Address 3", - postcode: "postcode", - internal_reference: "Internal Reference", -}; +import { ADDRESS_FIELDS } from "@/lib/bulkUpload/columnFields"; function transformFile( buffer: Buffer, - columnMapping: Record + columnMapping: Record // field → source header ): { csv: string; error?: never } | { csv?: never; error: string } { const wb = XLSX.read(buffer, { type: "buffer" }); const sheet = wb.Sheets[wb.SheetNames[0]]; @@ -24,16 +17,13 @@ function transformFile( if (rows.length === 0) return { error: "Empty file" }; - const sourceHeaders = Object.keys(rows[0]); const outputHeaders: string[] = []; - const sourceToOutput: Record = {}; - - for (const src of sourceHeaders) { - const mapped = columnMapping[src]; - if (!mapped || mapped === "skip") continue; - const renamed = FIELD_RENAME[mapped] ?? mapped; - outputHeaders.push(renamed); - sourceToOutput[src] = renamed; + const outputToSource: Record = {}; + for (const field of ADDRESS_FIELDS) { + const src = columnMapping[field.value]; + if (!src || !field.outputHeader) continue; + outputHeaders.push(field.outputHeader); + outputToSource[field.outputHeader] = src; } if (!outputHeaders.includes("Address 1")) @@ -43,8 +33,8 @@ function transformFile( const outputRows = rows.map((row) => { const out: Record = {}; - for (const [src, renamed] of Object.entries(sourceToOutput)) { - out[renamed] = row[src] ?? ""; + for (const [outName, src] of Object.entries(outputToSource)) { + out[outName] = row[src] ?? ""; } return out; }); @@ -112,13 +102,13 @@ export async function POST( const s3Uri = `s3://${outputBucket}/${transformedKey}`; - const trigger = await triggerAddressMatching({ - uploadId, - s3Uri, - sessionToken: readSessionToken(request), - }); + const sessionToken = readSessionToken(request); + const trigger = await triggerAddressMatching({ uploadId, s3Uri, sessionToken }); if (trigger.kind === "trigger_failed") return NextResponse.json({ error: trigger.message }, { status: trigger.status }); + // Co-fire the landlord classifier (non-blocking) under the same task. + await triggerClassifier({ taskId: trigger.taskId, uploadId, sessionToken }); + return NextResponse.json({ taskId: trigger.taskId }, { status: 200 }); } diff --git a/src/app/db/migrations/meta/_journal.json b/src/app/db/migrations/meta/_journal.json index 194575bd..452acfd0 100644 --- a/src/app/db/migrations/meta/_journal.json +++ b/src/app/db/migrations/meta/_journal.json @@ -1513,69 +1513,6 @@ "when": 1779992128370, "tag": "0216_add_subtask_service", "breakpoints": true - }, - { - "idx": 217, - "version": "7", - "when": 1780404222902, - "tag": "0217_gray_hellion", - "breakpoints": true - }, - { - "idx": 218, - "version": "7", - "when": 1780408378351, - "tag": "0218_natural_umar", - "breakpoints": true - }, - { - "idx": 219, - "version": "7", - "when": 1780419959831, - "tag": "0219_add_verify_ack", - "breakpoints": true - }, - { - "idx": 220, - "version": "7", - "when": 1780491109956, - "tag": "0220_round_retro_girl", - "breakpoints": true - }, - { - "idx": 221, - "version": "7", - "when": 1780566543108, - "tag": "0221_nice_sumo", - "breakpoints": true - }, - { - "idx": 222, - "version": "7", - "when": 1780647165601, - "tag": "0222_nifty_hellcat", - "breakpoints": true - }, - { - "idx": 223, - "version": "7", - "when": 1780647248894, - "tag": "0223_recommendation_plan_id_backfill", - "breakpoints": true - }, - { - "idx": 224, - "version": "7", - "when": 1780653770494, - "tag": "0224_busy_nitro", - "breakpoints": true - }, - { - "idx": 225, - "version": "7", - "when": 1780654800000, - "tag": "0225_recommendation_material_id_backfill", - "breakpoints": true } ] } \ No newline at end of file diff --git a/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx b/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx index 3824b1b4..b298d7f6 100644 --- a/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx +++ b/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx @@ -41,11 +41,16 @@ export default function OnboardingProgress({ } const { task, upload } = progress.data; - const total = task?.totalSubtasks ?? 0; - const completedSubtasks = task?.completedSubtasks ?? 0; - const failedSubtasks = task?.failedSubtasks ?? 0; + // Address-matching batches drive the bar; classification is shown separately. + const total = task?.addressTotal ?? 0; + const completedSubtasks = task?.addressCompleted ?? 0; + const failedSubtasks = task?.addressFailed ?? 0; const percent = total > 0 ? Math.round((completedSubtasks / total) * 100) : 0; + const classifierTotal = task?.classifierTotal ?? 0; + const classifierCompleted = task?.classifierCompleted ?? 0; + const classifierFailed = task?.classifierFailed ?? 0; + const taskStatus = task?.status.toLowerCase() ?? ""; const taskDone = TASK_TERMINAL_STATUSES.has(taskStatus); const taskFailed = TASK_FAILED_STATUSES.has(taskStatus); @@ -76,6 +81,24 @@ export default function OnboardingProgress({ {failedSubtasks} failed )} + {classifierTotal > 0 && ( + + Classification: + {classifierFailed > 0 ? ( + + + failed + + ) : classifierCompleted >= classifierTotal ? ( + complete + ) : ( + + + running + + )} + + )} {!taskDone && ( diff --git a/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/map-columns/MapColumnsClient.tsx b/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/map-columns/MapColumnsClient.tsx index adda7d38..fe729d6d 100644 --- a/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/map-columns/MapColumnsClient.tsx +++ b/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/map-columns/MapColumnsClient.tsx @@ -10,38 +10,14 @@ import { ArrowsRightLeftIcon, } from "@heroicons/react/24/outline"; import { useSetColumnMapping } from "@/lib/bulkUpload/client"; - -const INTERNAL_FIELDS = [ - { value: "address_1", label: "Address 1", required: true }, - { value: "address_2", label: "Address 2", required: false }, - { value: "address_3", label: "Address 3", required: false }, - { value: "postcode", label: "Postcode", required: true }, - { value: "internal_reference", label: "Internal Reference (Optional)", required: false }, - { value: "skip", label: "Skip this column", required: false }, -]; - -const REQUIRED_VALUES = ["address_1", "postcode"]; - -function autoDetect(header: string): string { - const h = header.toLowerCase().replace(/[\s_\-]/g, ""); - if (/^(address|addr)(line)?(1|one)?$/.test(h)) return "address_1"; - if (/^(address|addr)(line)?(2|two)|^street$/.test(h)) return "address_2"; - if (/^(address|addr)(line)?(3|three)|^locality$|^town$|^city$/.test(h)) return "address_3"; - if (/^post(al)?code$|^postcode$|^pcode$/.test(h)) return "postcode"; - if (/^(internal)?ref(erence)?$|^id$/.test(h)) return "internal_reference"; - return "skip"; -} - -function buildInitialMapping( - headers: string[], - existing?: Record -): Record { - const mapping: Record = {}; - for (const h of headers) { - mapping[h] = existing?.[h] ?? autoDetect(h); - } - return mapping; -} +import { + ADDRESS_FIELDS, + CLASSIFIER_FIELDS, + NOT_PROVIDED, + buildInitialMapping, + validateColumnMapping, + type InternalField, +} from "@/lib/bulkUpload/columnFields"; interface Props { portfolioId: string; @@ -59,19 +35,24 @@ export default function MapColumnsClient({ existingMapping, }: Props) { const router = useRouter(); + // mapping: internal field → source CSV header. Unmapped fields are absent. const [mapping, setMapping] = useState>( buildInitialMapping(sourceHeaders, existingMapping) ); const setMappingMutation = useSetColumnMapping(portfolioId, uploadId); - const mappedValues = Object.values(mapping).filter((v) => v !== "skip"); - const missingRequired = REQUIRED_VALUES.filter((r) => !mappedValues.includes(r)); + const validationError = validateColumnMapping(mapping); const submitting = setMappingMutation.isPending; - const error = setMappingMutation.error?.message ?? null; - const canSubmit = missingRequired.length === 0 && !submitting; + const requestError = setMappingMutation.error?.message ?? null; + const canSubmit = validationError === null && !submitting; - function setField(header: string, value: string) { - setMapping((prev) => ({ ...prev, [header]: value })); + function setField(field: string, header: string) { + setMapping((prev) => { + const next = { ...prev }; + if (header === NOT_PROVIDED) delete next[field]; + else next[field] = header; + return next; + }); } function handleSubmit() { @@ -86,6 +67,86 @@ export default function MapColumnsClient({ ); } + function renderRow(field: InternalField) { + const value = mapping[field.value] ?? NOT_PROVIDED; + const isMapped = value !== NOT_PROVIDED; + return ( +
+ {/* Internal field */} +
+
+ +
+
+

+ {field.label} + {field.required && *} +

+

+ {field.kind === "classifier" ? "Landlord description" : "Internal field"} +

+
+
+ + {/* Arrow */} +
+ +
+ + {/* Header picker */} +
+ +
+ + {/* Status badge */} +
+ + + {isMapped ? "Mapped" : "Not provided"} + +
+
+ ); + } + + function renderSection(title: string, subtitle: string, fields: InternalField[]) { + return ( +
+
+

+ {title} +

+

{subtitle}

+
+ {sourceHeaders.length === 0 ? ( +
+ No headers found in this file. +
+ ) : ( +
{fields.map(renderRow)}
+ )} +
+ ); + } + return (
{/* Breadcrumb + step */} @@ -116,102 +177,27 @@ export default function MapColumnsClient({ Column Remapper

- Align your spreadsheet headers with our internal property data structure to - ensure accurate address processing. + Tell us which spreadsheet column feeds each field. Address fields drive + matching; landlord-description fields are classified into property facts.

- {/* Table */} -
- {/* Column headers */} -
- - Spreadsheet Header - - - - Internal Field Mapping - - - Status - -
- - {sourceHeaders.length === 0 ? ( -
- No headers found in this file. -
- ) : ( -
- {sourceHeaders.map((header) => { - const value = mapping[header] ?? "skip"; - const isMapped = value !== "skip"; - return ( -
- {/* Source header */} -
-
- -
-
-

{header}

-

Source column

-
-
- - {/* Arrow */} -
- -
- - {/* Dropdown */} -
- -
- - {/* Status badge */} -
- - - {isMapped ? "Mapped" : "Skipped"} - -
-
- ); - })} -
- )} -
- - {/* Validation error */} - {missingRequired.length > 0 && ( -

- Required fields not yet mapped:{" "} - {missingRequired - .map((r) => INTERNAL_FIELDS.find((f) => f.value === r)?.label) - .join(", ")} -

+ {renderSection( + "Address fields", + "Used for address matching. A column can feed only one address field.", + ADDRESS_FIELDS )} - {error &&

{error}

} + {renderSection( + "Landlord description fields (optional)", + "Classified into property facts. Several fields may share one column.", + CLASSIFIER_FIELDS + )} + + {/* Validation / request error */} + {validationError && ( +

{validationError}

+ )} + {requestError &&

{requestError}

} {/* Footer */}
@@ -249,8 +235,10 @@ export default function MapColumnsClient({ Pro Tip

- “Ensure your source file doesn't have blank headers. Any column mapped to - “Skip” will be ignored during import.” + “Fields left as “Not provided” are ignored. The same + column can feed several landlord-description fields — e.g. one + “Property Type” column can drive both Property Type and Built + Form.”

diff --git a/src/app/portfolio/[slug]/(portfolio)/landlord-overrides/page.tsx b/src/app/portfolio/[slug]/(portfolio)/landlord-overrides/page.tsx new file mode 100644 index 00000000..9e9ac249 --- /dev/null +++ b/src/app/portfolio/[slug]/(portfolio)/landlord-overrides/page.tsx @@ -0,0 +1,101 @@ +import { getServerSession } from "next-auth"; +import { redirect } from "next/navigation"; +import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions"; +import { + getLandlordOverrides, + type OverrideRow, + type LandlordOverrideCategory, +} from "@/lib/landlordOverrides/server"; +import { CLASSIFIER_FIELDS } from "@/lib/bulkUpload/columnFields"; + +export default async function LandlordOverridesPage(props: { + params: Promise<{ slug: string }>; +}) { + const { slug } = await props.params; + const session = await getServerSession(AuthOptions); + if (!session) redirect("/login"); + + const results = await getLandlordOverrides(slug); + const total = Object.values(results).reduce((n, rows) => n + rows.length, 0); + + return ( +
+
+

+ Landlord overrides +

+

+ Property facts classified from your bulk-upload descriptions. Read-only + — editing comes later. +

+
+ + {total === 0 ? ( +
+ No classified values yet. They appear here once a bulk upload with + landlord-description columns has been processed. +
+ ) : ( + CLASSIFIER_FIELDS.map((field) => ( + + )) + )} +
+ ); +} + +function OverrideSection({ title, rows }: { title: string; rows: OverrideRow[] }) { + return ( +
+
+

+ {title} +

+ + {rows.length} {rows.length === 1 ? "value" : "values"} + +
+ {rows.length === 0 ? ( +
+ No values for this category. +
+ ) : ( +
+ {rows.map((row, i) => ( +
+

+ {row.description} +

+

+ {row.value} +

+
+ +
+
+ ))} +
+ )} +
+ ); +} + +function SourceBadge({ source }: { source: string }) { + const isUser = source === "user"; + return ( + + {isUser ? "user" : "classifier"} + + ); +} diff --git a/src/lib/bulkUpload/columnFields.ts b/src/lib/bulkUpload/columnFields.ts new file mode 100644 index 00000000..f29bff27 --- /dev/null +++ b/src/lib/bulkUpload/columnFields.ts @@ -0,0 +1,100 @@ +// Single source of truth for BulkUpload column mapping. +// +// The mapping is stored as `field → source CSV header` (one entry per mapped +// internal field). One source header may feed several CLASSIFIER fields (e.g. +// "Property Type" → both property_type and built_form_type) but at most one +// ADDRESS field — see docs/adr/0003-classifier-triggers-as-address-subtask.md +// and docs/wip/landlord-override-frontend-plan.md (Q2.2, Q3). +// +// Classifier field `value`s mirror the Model service's ClassifiableColumn names +// (property_type / built_form_type / wall_type / roof_type) so the mapping can +// be forwarded to the lambda trigger verbatim. + +export type InternalFieldKind = "address" | "classifier"; + +export interface InternalField { + value: string; + label: string; + kind: InternalFieldKind; + required: boolean; + // Canonical header written into the address-matching CSV (address fields only). + outputHeader?: string; +} + +export const INTERNAL_FIELDS: InternalField[] = [ + { value: "address_1", label: "Address 1", kind: "address", required: true, outputHeader: "Address 1" }, + { value: "address_2", label: "Address 2", kind: "address", required: false, outputHeader: "Address 2" }, + { value: "address_3", label: "Address 3", kind: "address", required: false, outputHeader: "Address 3" }, + { value: "postcode", label: "Postcode", kind: "address", required: true, outputHeader: "postcode" }, + { value: "internal_reference", label: "Internal Reference", kind: "address", required: false, outputHeader: "Internal Reference" }, + { value: "property_type", label: "Property Type", kind: "classifier", required: false }, + { value: "built_form_type", label: "Built Form", kind: "classifier", required: false }, + { value: "wall_type", label: "Wall Type", kind: "classifier", required: false }, + { value: "roof_type", label: "Roof Type", kind: "classifier", required: false }, +]; + +export const ADDRESS_FIELDS = INTERNAL_FIELDS.filter((f) => f.kind === "address"); +export const CLASSIFIER_FIELDS = INTERNAL_FIELDS.filter((f) => f.kind === "classifier"); +export const CLASSIFIER_FIELD_VALUES = CLASSIFIER_FIELDS.map((f) => f.value); +export const REQUIRED_FIELD_VALUES = INTERNAL_FIELDS.filter((f) => f.required).map((f) => f.value); + +// Sentinel for an unmapped field in the UI dropdown ("Not provided"). +export const NOT_PROVIDED = ""; + +// header → address field detection. Classifier fields are never auto-detected +// (Q2.1): mapping them is always an explicit user choice. +export function autoDetectField(header: string): string | null { + const h = header.toLowerCase().replace(/[\s_\-]/g, ""); + if (/^(address|addr)(line)?(1|one)?$/.test(h)) return "address_1"; + if (/^(address|addr)(line)?(2|two)|^street$/.test(h)) return "address_2"; + if (/^(address|addr)(line)?(3|three)|^locality$|^town$|^city$/.test(h)) return "address_3"; + if (/^post(al)?code$|^postcode$|^pcode$/.test(h)) return "postcode"; + if (/^(internal)?ref(erence)?$|^id$/.test(h)) return "internal_reference"; + return null; +} + +// Build the initial field→header mapping: keep any existing choices, then +// auto-fill address fields from the headers (first matching header wins). +export function buildInitialMapping( + sourceHeaders: string[], + existing?: Record, +): Record { + const mapping: Record = { ...(existing ?? {}) }; + for (const header of sourceHeaders) { + const field = autoDetectField(header); + if (!field) continue; + if (mapping[field] === undefined) mapping[field] = header; + } + return mapping; +} + +// Validation shared by the client (live) and the server (authoritative). +// Returns the first problem as a message, or null when the mapping is valid. +export function validateColumnMapping(mapping: Record): string | null { + for (const field of REQUIRED_FIELD_VALUES) { + if (!mapping[field]) { + const label = INTERNAL_FIELDS.find((f) => f.value === field)?.label ?? field; + return `${label} must be mapped to a column.`; + } + } + const usedAddressHeaders = new Set(); + for (const field of ADDRESS_FIELDS) { + const header = mapping[field.value]; + if (!header) continue; + if (usedAddressHeaders.has(header)) { + return `Column "${header}" is mapped to more than one address field.`; + } + usedAddressHeaders.add(header); + } + return null; +} + +// The classifier subset of the mapping (category → source header) that gets +// forwarded to the lambda trigger. Address fields are intentionally excluded. +export function classifierMapping(mapping: Record): Record { + const out: Record = {}; + for (const field of CLASSIFIER_FIELD_VALUES) { + if (mapping[field]) out[field] = mapping[field]; + } + return out; +} diff --git a/src/lib/bulkUpload/server.ts b/src/lib/bulkUpload/server.ts index a5785c39..e5260627 100644 --- a/src/lib/bulkUpload/server.ts +++ b/src/lib/bulkUpload/server.ts @@ -4,6 +4,8 @@ import { tasks } from "@/app/db/schema/tasks/tasks"; import { subTasks } from "@/app/db/schema/tasks/subtask"; import { count, desc, eq, sql } from "drizzle-orm"; import type { BulkUpload, BulkUploadStatus, ProgressView, TaskSummary } from "./types"; +import { validateColumnMapping, classifierMapping } from "./columnFields"; +import { SUBTASK_SERVICE } from "./types"; const REMAP_ALLOWED: ReadonlySet = new Set([ "ready_for_processing", @@ -78,6 +80,12 @@ async function loadTaskSummary(taskId: string): Promise { totalSubtasks: count(subTasks.id), completedSubtasks: sql`count(case when lower(${subTasks.status}) in ('completed', 'complete') then 1 end)::int`, failedSubtasks: sql`count(case when lower(${subTasks.status}) in ('failed', 'failure', 'error') then 1 end)::int`, + addressTotal: sql`count(case when (${subTasks.service} = 'address2uprn' or ${subTasks.service} is null) and ${subTasks.id} is not null then 1 end)::int`, + addressCompleted: sql`count(case when (${subTasks.service} = 'address2uprn' or ${subTasks.service} is null) and lower(${subTasks.status}) in ('completed', 'complete') then 1 end)::int`, + addressFailed: sql`count(case when (${subTasks.service} = 'address2uprn' or ${subTasks.service} is null) and lower(${subTasks.status}) in ('failed', 'failure', 'error') then 1 end)::int`, + classifierTotal: sql`count(case when ${subTasks.service} = 'landlord_description_overrides' then 1 end)::int`, + classifierCompleted: sql`count(case when ${subTasks.service} = 'landlord_description_overrides' and lower(${subTasks.status}) in ('completed', 'complete') then 1 end)::int`, + classifierFailed: sql`count(case when ${subTasks.service} = 'landlord_description_overrides' and lower(${subTasks.status}) in ('failed', 'failure', 'error') then 1 end)::int`, }) .from(tasks) .leftJoin(subTasks, eq(subTasks.taskId, tasks.id)) @@ -94,13 +102,6 @@ export async function getProgressView(uploadId: string): Promise): string | null { - const values = Object.values(mapping); - if (!values.includes("address_1")) return "Mapping must include address_1."; - if (!values.includes("postcode")) return "Mapping must include postcode."; - return null; -} - export type SetMappingOutcome = | { kind: "ok"; upload: BulkUpload } | { kind: "not_found" } @@ -116,7 +117,7 @@ export async function setColumnMapping( if (!REMAP_ALLOWED.has(upload.status as BulkUploadStatus)) return { kind: "invalid_status", current: upload.status }; - const reason = validateMapping(mapping); + const reason = validateColumnMapping(mapping); if (reason) return { kind: "invalid_mapping", reason }; const [updated] = await db @@ -174,6 +175,7 @@ export async function triggerAddressMatching(args: { .values({ taskId: task.id, status: "waiting", + service: SUBTASK_SERVICE.address, inputs: JSON.stringify({ bulk_upload_id: args.uploadId }), }) .returning(); @@ -209,6 +211,62 @@ export async function triggerAddressMatching(args: { return { kind: "ok", taskId: task.id }; } +// Co-fires the landlord classifier as a subtask under the address task. Reads +// the ORIGINAL upload (the address-matching CSV strips the description columns) +// and is non-blocking: a trigger failure marks only the classifier subtask, so +// address matching is unaffected. See ADR-0003. +export async function triggerClassifier(args: { + taskId: string; + uploadId: string; + sessionToken: string | undefined; +}): Promise { + const upload = await loadById(args.uploadId); + if (!upload || !upload.columnMapping) return; + + const columnMapping = classifierMapping(upload.columnMapping); + if (Object.keys(columnMapping).length === 0) return; + + const [subTask] = await db + .insert(subTasks) + .values({ + taskId: args.taskId, + status: "waiting", + service: SUBTASK_SERVICE.classifier, + inputs: JSON.stringify({ bulk_upload_id: args.uploadId }), + }) + .returning(); + + const payload = { + task_id: args.taskId, + sub_task_id: subTask.id, + s3_uri: `s3://${upload.s3Bucket}/${upload.s3Key}`, + portfolio_id: Number(upload.portfolioId), + column_mapping: columnMapping, + }; + + const trigger = await triggerFastApiPipeline({ + endpoint: "/v1/bulk-uploads/trigger-landlord-overrides", + payload, + sessionToken: args.sessionToken, + }); + + if (!trigger.ok) { + await db + .update(subTasks) + .set({ + status: "failed", + outputs: JSON.stringify({ error: trigger.message }), + }) + .where(eq(subTasks.id, subTask.id)); + return; + } + + await db + .update(subTasks) + .set({ status: "in progress", inputs: JSON.stringify(payload) }) + .where(eq(subTasks.id, subTask.id)); +} + export type CombineRetriggerOutcome = | { kind: "triggered"; taskId: string; subTaskId: string } | { kind: "already_combined" } diff --git a/src/lib/bulkUpload/types.ts b/src/lib/bulkUpload/types.ts index c2b1cb63..38d8f125 100644 --- a/src/lib/bulkUpload/types.ts +++ b/src/lib/bulkUpload/types.ts @@ -14,6 +14,13 @@ export type BulkUploadStatus = (typeof BULK_UPLOAD_STATUSES)[number]; export type BulkUpload = typeof bulkAddressUploads.$inferSelect; +// sub_task.service values. NULL is treated as address (legacy rows + the +// backend-spawned postcode-split children, which don't set it). See ADR-0003. +export const SUBTASK_SERVICE = { + address: "address2uprn", + classifier: "landlord_description_overrides", +} as const; + export type TaskSummary = { id: string; taskSource: string; @@ -25,6 +32,14 @@ export type TaskSummary = { totalSubtasks: number; completedSubtasks: number; failedSubtasks: number; + // Per-pipeline breakdown so onboarding progress can separate address + // matching from landlord classification (ADR-0003). + addressTotal: number; + addressCompleted: number; + addressFailed: number; + classifierTotal: number; + classifierCompleted: number; + classifierFailed: number; }; export type ProgressView = { diff --git a/src/lib/landlordOverrides/server.ts b/src/lib/landlordOverrides/server.ts new file mode 100644 index 00000000..c8f98eab --- /dev/null +++ b/src/lib/landlordOverrides/server.ts @@ -0,0 +1,79 @@ +import { db } from "@/app/db/db"; +import { + landlordPropertyTypeOverrides, + landlordBuiltFormTypeOverrides, + landlordWallTypeOverrides, + landlordRoofTypeOverrides, +} from "@/app/db/schema/landlord_overrides"; +import { asc, eq } from "drizzle-orm"; + +export interface OverrideRow { + description: string; + value: string; + source: string; +} + +export type LandlordOverrideCategory = + | "property_type" + | "built_form_type" + | "wall_type" + | "roof_type"; + +export type LandlordOverrideResults = Record; + +const EMPTY: LandlordOverrideResults = { + property_type: [], + built_form_type: [], + wall_type: [], + roof_type: [], +}; + +// Reads the four landlord_*_overrides tables for a portfolio. The portfolio id +// is a bigint FK; the bulk-upload flow carries it as a numeric string. +export async function getLandlordOverrides( + portfolioId: string +): Promise { + if (!/^\d+$/.test(portfolioId)) return EMPTY; + const pid = BigInt(portfolioId); + + const [property_type, built_form_type, wall_type, roof_type] = await Promise.all([ + db + .select({ + description: landlordPropertyTypeOverrides.description, + value: landlordPropertyTypeOverrides.value, + source: landlordPropertyTypeOverrides.source, + }) + .from(landlordPropertyTypeOverrides) + .where(eq(landlordPropertyTypeOverrides.portfolioId, pid)) + .orderBy(asc(landlordPropertyTypeOverrides.description)), + db + .select({ + description: landlordBuiltFormTypeOverrides.description, + value: landlordBuiltFormTypeOverrides.value, + source: landlordBuiltFormTypeOverrides.source, + }) + .from(landlordBuiltFormTypeOverrides) + .where(eq(landlordBuiltFormTypeOverrides.portfolioId, pid)) + .orderBy(asc(landlordBuiltFormTypeOverrides.description)), + db + .select({ + description: landlordWallTypeOverrides.description, + value: landlordWallTypeOverrides.value, + source: landlordWallTypeOverrides.source, + }) + .from(landlordWallTypeOverrides) + .where(eq(landlordWallTypeOverrides.portfolioId, pid)) + .orderBy(asc(landlordWallTypeOverrides.description)), + db + .select({ + description: landlordRoofTypeOverrides.description, + value: landlordRoofTypeOverrides.value, + source: landlordRoofTypeOverrides.source, + }) + .from(landlordRoofTypeOverrides) + .where(eq(landlordRoofTypeOverrides.portfolioId, pid)) + .orderBy(asc(landlordRoofTypeOverrides.description)), + ]); + + return { property_type, built_form_type, wall_type, roof_type }; +}