Merge pull request #310 from Hestia-Homes/feature/frontend_landlord_overrides

Feature/frontend landlord overrides
This commit is contained in:
Jun-te Kim 2026-06-08 16:23:21 +01:00 committed by GitHub
commit d0d5ec0366
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 3273 additions and 412 deletions

View file

@ -1,5 +1,28 @@
{
"permissions": {
"allow": [
"Read(//home/vscode/.claude/skills/**)",
"Bash(git fetch *)",
"Bash(git add *)",
"Bash(git commit *)",
"Bash(git merge *)",
"Bash(git pull *)",
"Bash(git push *)",
"Bash(git status *)",
"Bash(git checkout *)",
"Bash(git stash *)",
"Bash(git config *)",
"Bash(git branch *)",
"Bash(git worktree *)",
"Bash(git check-ignore *)",
"Bash(git ls-tree *)",
"Bash(npm install *)",
"Bash(npm run *)",
"Bash(npx drizzle-kit *)",
"Bash(pip install *)",
"Bash(terraform fmt *)",
"Bash(gh label *)"
],
"deny": [
"Bash(npx drizzle-kit generate)",
"Bash(npx drizzle-kit push)"

3
.gitignore vendored
View file

@ -41,3 +41,6 @@ next-env.d.ts
backlog/**
docs/adr/**
# Personal Claude Code settings (per-developer, not shared)
.claude/settings.local.json

View file

@ -38,13 +38,42 @@ The housing association supplying a Portfolio's BulkUploads. A Landlord knows fa
_Avoid_: customer, client, owner, organisation (Organisation is a separate, broader entity)
**Landlord override**:
A landlord-supplied fact about a property that takes precedence over EPC-derived defaults when computing an assessment. The end-to-end Landlord override journey has two layers — a **VocabularyMapping** layer (this glossary entry below) and a per-Property fact layer (not yet modelled).
A landlord-supplied fact about a property that takes precedence over EPC-derived defaults when computing an assessment. The end-to-end Landlord override journey has two layers — a **VocabularyMapping** layer (this glossary entry below) and a per-Property fact layer (the **Property override**, below).
_Avoid_: customer data, manual override, landlord data
**Property override**:
The per-Property fact layer — one resolved fact per `(Property, Building part, component)`, where component is one of `wall_type`/`roof_type`/`property_type`/`built_form_type`. Holds a **snapshot** of the resolved enum value (a denormalised copy of the VocabularyMapping outcome at finalise time, so two Properties sharing a description can later diverge), plus the original spreadsheet text it resolved from. Materialised by the finaliser **for UPRN-matched Properties only** (v2); the resolved value is never `UNKNOWN` — the Verify step forces every `UNKNOWN` to be mapped before Finalise, and an unresolved description fails the run. See [ADR-0005](./docs/adr/0005-async-bulk-upload-finaliser.md) (table) and [ADR-0006](./docs/adr/0006-property-overrides-join-and-no-uprn-defer.md) (population).
_Avoid_: per-property mapping, property fact, override row
**Source row id**:
A synthetic UUID minted per source-file row at `start-address-matching` and written into **both** the address CSV and the classifier CSV. It is the stable join key that lets the finaliser tie a row's identity (combiner output → `property_id`) to that row's raw descriptions (classifier CSV), since neither file preserves row order and `Internal Reference` is absent from the classifier CSV. See [ADR-0006](./docs/adr/0006-property-overrides-join-and-no-uprn-defer.md).
_Avoid_: row index, internal reference (a separate, optional landlord field)
**VocabularyMapping**:
The translation from a Landlord's free-text description in a BulkUpload column (e.g. `"cavity: filledcavity"`) to a canonical domain enum value (e.g. `WallType.CAVITY`). Produced by a `ColumnClassifier` (today an LLM, tomorrow possibly a lookup table or rules engine) in the Model service. Stored per-Portfolio, one row per `(category, description)`. A row carries provenance (`classifier` or `user`) so user overrides survive re-classification.
_Avoid_: column mapping (that's a separate concept — see `ColumnMapping` above), classification, dictionary
### Building parts
**Building part**:
One physically distinct part of a dwelling described by a single entry within a multi-valued cell. A dwelling is one **Main building** plus zero or more **Extensions**. Per-part descriptions appear as comma-separated entries in physical-element columns (e.g. `Walls`, `Roofs`); whole-dwelling columns (e.g. `Property Type`) carry a single entry and are **not** split per part.
_Avoid_: annexe, unit, section, dwelling part
**Main building**:
The principal building part of a dwelling — exactly one per address. The others are **Extensions**.
**Extension**:
A building part that is not the Main building, numbered **Extension 1 … Extension N-1** for an N-entry address.
_Avoid_: annexe, addition, outbuilding
**Multi-entry**:
The property of a BulkUpload row whose physical-element cells hold **more than one comma-separated entry**, one per **Building part**. Always intra-cell in our data — never multiple rows sharing one address/UPRN. Within a row, the multi-valued columns agree on entry-count, so **position `i` is the same Building part across every multi-valued column**.
_Avoid_: multi-row, multi-record, duplicate address
**Building-part ordering** (a.k.a. **ordering**):
The user's declaration, captured once per file, of which list-position maps to which Building part — because the entry order is a consistent per-file mistake (`"A, B"` could be `[Main, Extension 1]` or `[Extension 1, Main]`). Stored per entry-count as a permutation. See [ADR-0004](./docs/adr/0004-multi-entry-building-part-ordering.md).
_Avoid_: sort order, sequence, column mapping
## Lifecycle
A **BulkUpload** moves through these statuses:
@ -55,15 +84,20 @@ ready_for_processing
→ processing (Address matching triggered; Next.js writes)
→ combining (Combiner stage running; FastAPI writes directly)
→ awaiting_review (Combiner output in S3; FastAPI writes directly)
→ complete (Finalise succeeded; Next.js writes)
→ failed (FastAPI reports in-flight failure — schema only, not yet wired)
→ finalising (Finalise dispatched; Next.js writes via compare-and-swap)
→ complete (Finaliser succeeded; FastAPI/Lambda writes directly)
→ failed (Finaliser failed; FastAPI/Lambda writes directly)
```
`complete` and `failed` are terminal.
`complete` and `failed` are terminal. `finalising` is the in-flight state of the
async finaliser (mirrors `combining`); the UI renders it as "Uploading to ARA". See
[ADR-0005](./docs/adr/0005-async-bulk-upload-finaliser.md).
Re-mapping (PATCHing `columnMapping`) is legal only in `ready_for_processing` and `mapping_complete`. Any later state rejects with 409.
**Two writers**: Next.js owns transitions out of `mapping_complete`, into `processing`, and the terminal Finalise outcomes. FastAPI owns `combining` and `awaiting_review` — writing them direct to the DB during the combiner run. The BulkUpload aggregate observes both.
**Two writers**: Next.js owns transitions out of `mapping_complete`, into `processing`, and the `awaiting_review → finalising` compare-and-swap at Finalise dispatch. FastAPI/Lambda owns `combining`, `awaiting_review`, and the terminal `finalising → complete`/`failed` — writing them direct to the DB during the combiner and finaliser runs. The BulkUpload aggregate observes both. See [ADR-0005](./docs/adr/0005-async-bulk-upload-finaliser.md).
At `awaiting_review`, **Finalise is gated** (not a new status — a precondition on the action): when classifier columns were mapped the user must acknowledge the classification-verification step, and when the file is **Multi-entry** they must confirm the **Building-part ordering**. See [ADR-0004](./docs/adr/0004-multi-entry-building-part-ordering.md).
See [ADR-0001](./docs/adr/0001-bulk-upload-state-machine.md) for the deliberate "not yet" decisions baked into this lifecycle.
@ -97,6 +131,8 @@ _Avoid_: override, adjustment, correction
>
> **Dev:** "And if **Finalise** runs and 30% of rows have no **UPRN**?"
> **Domain expert:** "Those still get imported as **Properties** — just without a UPRN — and the BulkUpload moves to `complete`. Manual cleanup happens later in the property table."
>
> _(Planned change — v3 / [ADR-0006](./docs/adr/0006-property-overrides-join-and-no-uprn-defer.md): no-UPRN rows will move to a separate staging table to be re-matched, so `property` holds only matched rows. v2 does **not** change this yet — and v2 writes **Property overrides** only for the UPRN-matched rows.)_
## Flagged ambiguities

View file

@ -0,0 +1,291 @@
# Handover: `bulk_upload_finaliser` v2 — populate `property_overrides`
> **Purpose.** Self-contained brief to start a fresh context implementing v2 (the
> per-Property fact layer) in the existing `bulk_upload_finaliser` Lambda. v1
> (async finalise that writes `property`) is **shipped and working end-to-end**.
> This doc assumes no memory of the v1 session.
## 0. Design resolved — grilling outcome (2026-06-05)
> The open questions in §9 were resolved in a design session. **This section is now
> authoritative**; the later sections are kept for background but where they conflict
> with this one, this one wins. The new v2 ADR is
> [`docs/adr/0006-property-overrides-join-and-no-uprn-defer.md`](../adr/0006-property-overrides-join-and-no-uprn-defer.md);
> ADR-0004 was amended for per-count ordering capture.
**Spine.** Populate `property_overrides` at finalise **for UPRN-matched rows only**.
Join the classifier descriptions to the combiner identity by a **synthetic UUID
`source_row_id`** — *not* `Internal Reference` (it is **absent from the classifier
CSV**, and optional anyway) and *not* by carrying description columns through
`address2uprn` (architecture B, rejected). This is architecture **(A)** with a
purpose-built key.
**No-UPRN rows are deferred to v3.** v1 *currently* inserts them as `property` rows;
**v2 changes nothing in the property insert** and simply writes no overrides for
them. The eventual home for unmatched rows is a **separate staging table** (Model B):
`property` holds only matched rows; unmatched inputs (with their descriptions) live in
the staging table until a *different UPRN matcher* assigns a UPRN and promotes them.
"Found vs unfound" is a view across both tables, **not** a flag on `property`. v3 owns
the property-insert change + the staging table + the matcher-rerun UX together.
**Frontend work** (`/workspaces/assessment-model`):
1. **Mint `source_row_id`** (UUID) in `start-address-matching` right after
`readRows()`, and **explicitly emit it as a column in both** `buildAddressCsv` and
`buildClassifierCsv` — both project a *fixed* column set, so attaching it to the
row object is not enough. It survives `address2uprn`→combiner like any input column
(carried as `additional_info`); **verify against a real combiner output**.
2. **Per-count ordering capture** (supersedes ADR-0004's largest-count-only):
`detectMultiEntry` keeps a sample **per distinct count**; `OnboardingProgress`
renders one ordering panel **per count ≥ 2**. The jsonb type and
`setMultiEntryOrdering` validation already accept all counts — **no migration, no
backend-validation change**.
3. **Verify gate hardened**: Finalise is blocked while **any** description is still
`UNKNOWN`. `UNKNOWN` is now a **transient "needs review" marker, never a final
value** (this retires the old "`UNKNOWN` is legitimate" line in §7).
4. **`dispatchFinaliser`** adds **two fields to the trigger body**:
`classifier_s3_uri` and `multi_entry_ordering` (it already reads the
`bulk_address_uploads` row, and dispatch happens *after* the user confirms
ordering, so the value is final). The classifier S3 key comes from a **shared
`classifierCsvKey(portfolioId, uploadId)` helper** used by both the writer and the
dispatcher (the key is not stored anywhere today — convention only).
**Backend work** (`/workspaces/home/github/Model`):
5. Grow the trigger schema in two places — FastAPI `FinaliserTriggerRequest` and
Lambda `BulkUploadFinaliserTriggerBody` — with `classifier_s3_uri` +
`multi_entry_ordering`. Handler stays trigger-driven (no new `bulk_address_uploads`
coupling).
6. **`PropertyOverrideRow`** table mirror + a **sibling `PropertyOverrideRepository`**
(own aggregate; upsert on `(property_id, override_component, building_part)`), and a
**read-only `LandlordOverrideRepository`** that loads a portfolio's vocabulary
**per component into dicts once** (the vocabulary is deduplicated, not per-row).
7. **Orchestrator step**, in the same `commit_scope`:
- bulk `SELECT (portfolio_id, uprn) → id` for the run's UPRN rows → in-memory map;
- join classifier↔combiner rows by `source_row_id`;
- **uniform comma-split all four components**`permutations[count]` → parts
(count-1 cell → `building_part = 0`); the finaliser needs **no fallback** because
every count ≥ 2 has a confirmed permutation;
- resolve each part's **normalized** description against the override dicts;
- `original_spreadsheet_description` = the **raw** entry text (un-normalized);
- **empty cell → write no row**; **non-empty but unresolved (or `UNKNOWN`) → raise**
`commit_scope` rolls back → `_mark_failed` flips the upload to `failed`
(**fail loudly, no partial writes**);
- write only the classifier components actually **mapped** in `columnMapping`;
- **no `source` column in v2** — upsert is unconditional for now.
**Locked assumptions (load-bearing — see ADR-0006).**
- **One real upload per user.** A re-upload only adds *new* properties (ones not
previously included), never re-describes existing ones → part-keys are append-only
across uploads → **upsert-only, no delete-orphans** is correct and complete.
- **Per-count consistency.** One ordering per count, confirmed from one sample, applies
to every cell of that count in the file (extends ADR-0004's bet to all counts).
- **Per-cell count.** `Walls` may split into 3 while `Roofs` splits into 2 in the same
row; each cell is ordered by *its own* entry count.
- **Classification completes before `awaiting_review`**, and the hardened verify gate
forces every `UNKNOWN` to be resolved — so an unresolved description at finalise is a
genuine defect, hence fail-loud.
## 1. Where v1 left things (read first)
v1 made **Finalise** an async dispatched Lambda that writes `property` rows. The
full flow works in dev: dispatch `202` → SQS → Lambda inserts properties + writes
terminal status → UI advances to "Processing complete".
Authoritative background — **read these before coding**:
- `docs/design/bulk-upload-finaliser.md` — the full grilling/design doc (schema Q6Q9, snapshot-not-FK, recalculate-on-rerun, the v2 input hazards).
- `docs/adr/0005-async-bulk-upload-finaliser.md` (frontend) — state machine + `property_overrides` shape.
- `/workspaces/home/github/Model/docs/adr/0013-bulk-upload-finaliser-writes-properties.md` (backend) — the Lambda write path + DDD layering.
- `docs/adr/0004-multi-entry-building-part-ordering.md`**critical for v2**: how building parts and `multiEntryOrdering` work.
- `docs/adr/0002-landlord-override-vocabulary.md` — the vocabulary (`landlord_*_overrides`) layer v2 resolves against.
- `CONTEXT.md` — glossary: **Property override**, **Building part**, **Main building**, **Extension**, **Multi-entry**, **Building-part ordering**, **VocabularyMapping**.
**Convention that must hold (it was corrected hard in v1):** in the Model repo,
business logic lives in `orchestration/*_orchestrator.py`; the Lambda
`applications/*/handler.py` stays thin (parse trigger, wire infra, delegate). One
repository per aggregate; orchestrators never commit (the handler owns the
transaction via `commit_scope`). See memory `model-ddd-layering`.
## 2. v2 goal
Populate `property_overrides` during finalise: for each property, write one row per
`(building_part, override_component)` carrying the **resolved enum snapshot** of the
landlord's description for that part.
## 3. The target table (already shipped — migration 0221, do NOT re-migrate)
Drizzle: `src/app/db/schema/property_overrides.ts`.
```
property_overrides
id uuid pk
property_id bigint NOT NULL FK → property.id ON DELETE CASCADE
portfolio_id bigint NOT NULL FK → portfolio.id ON DELETE CASCADE
building_part smallint NOT NULL -- 0 = main, 1 = ext 1, 2 = ext 2, …
override_component override_component NOT NULL -- pgEnum {wall_type, roof_type, property_type, built_form_type}
override_value text NOT NULL -- snapshot of the resolved enum value
original_spreadsheet_description text NOT NULL -- raw cell text it resolved from
created_at / updated_at timestamptz NOT NULL
UNIQUE (property_id, override_component, building_part)
```
## 4. Design decisions already locked (do not relitigate)
- **Snapshot, not FK.** `override_value` is a denormalised text copy of the resolved
enum, taken at materialise time — *not* an FK to `landlord_*_overrides`. This is
what lets two properties sharing a description diverge later, and is required
because there are four polymorphic vocabulary tables. Lineage is the natural key
`(portfolio_id, override_component, original_spreadsheet_description)`.
- **Re-run = recalculate.** Write with `onConflictDoUpdate` on
`(property_id, override_component, building_part)`, refreshing `override_value` +
`original_spreadsheet_description` + `updated_at`. (Contrast `property`, which is
`onConflictDoNothing`.) When a per-property user-edit path eventually exists, this
upsert will need a `WHERE source='classifier'` guard — but there is **no `source`
column in v1**; add it as a nullable column only when that path is built.
- **`override_component` values** are exactly the classifier category keys
(`wall_type`, `roof_type`, `property_type`, `built_form_type`) used in both
`src/lib/bulkUpload/columnFields.ts` and the Model `ClassifiableColumn.name` — no
translation.
- **`building_part` indexing**: `0 = Main building`, `1 = Extension 1`, … per ADR-0004.
- **Whole-dwelling components** (`property_type`, `built_form_type`) are per-part-
capable but today's files supply them once → usually written at `building_part = 0`.
## 5. The hard part: assembling the inputs (this is the real v2 work)
The combiner output (what the v1 finaliser reads) carries **only** address/UPRN
columns — `Address 1/2/3`, `postcode`, `Internal Reference`, `address2uprn_uprn`,
`address2uprn_address`, `address2uprn_lexiscore`. The **raw `Walls`/`Roofs`/
`Property Type`/`Built Form` cells are NOT in it.** They live only in:
- the **classifier CSV** on S3 — `bulk_onboarding_inputs/{portfolioId}/{uploadId}-classifier.csv` (original landlord headers), and
- `landlord_*_overrides` in Postgres — the *resolved* values keyed by `(portfolio_id, normalized description)`.
To write one `property_overrides` row, v2 must assemble **four inputs**:
| Need | Source |
|---|---|
| `property_id` (identity) | combiner output → `(portfolio_id, uprn)`**but no-UPRN rows have no key** |
| raw cell text per row | the classifier CSV (not the combiner output) |
| split a multi-valued cell → building parts | `multiEntryOrdering` on `bulk_address_uploads` |
| description → `override_value` | `landlord_*_overrides` (resolve by normalized description) |
### Two open hazards — both RESOLVED (see §0)
1. **Join key (RESOLVED).** Investigation confirmed `Internal Reference` is in the
address CSV + combiner output but **NOT in the classifier CSV**, and is optional.
So architecture (A)-by-`Internal Reference` is dead. **Resolution: mint a synthetic
UUID `source_row_id`** in `start-address-matching` after `readRows()`, emitted as an
explicit column in *both* CSVs. It is the join key. (Architecture (A) with a
purpose-built key; (B) "carry descriptions through `address2uprn`" was rejected.)
2. **`property_id` for no-UPRN rows (RESOLVED by descoping).** v2 writes overrides
**only for UPRN rows**, whose `property.id` is re-found by `(portfolio_id, uprn)`
— so **no `RETURNING` correlation is needed**. No-UPRN rows are deferred to v3
(Model B staging table); v2 leaves the property insert untouched.
## 6. `multiEntryOrdering` — how to split cells into parts
Persisted on `bulk_address_uploads` (`src/app/db/schema/bulk_address_uploads.ts`):
```ts
MultiEntryOrdering { permutations: Record<string, number[]>; confirmed: boolean }
// permutations[count][k] = the 0-based FILE position holding building part k
// where 0 = Main building, 1..N-1 = Extension 1..N-1.
// e.g. { "2": [1, 0] } => for 2-entry rows, the main building is file position 1.
```
A multi-valued cell (e.g. `Walls = "Cavity: …, Solid brick: …"`) splits on commas
into entries by file position; `permutations[count]` maps file position → building
part. **Caveat (ADR-0004):** only the **largest count** permutation is captured this
iteration; other counts need a derivation rule — decide it in v2.
`multiEntrySummary` holds the detected multi-valued columns + **normalized**
description keys (the normalization that matches the classifier's stored keys:
`split → strip → lower`).
## 7. Resolving description → value (`landlord_*_overrides`)
Four per-component tables in `src/app/db/schema/landlord_overrides.ts`
(`landlord_wall_type_overrides`, `…_roof_type_…`, `…_property_type_…`,
`…_built_form_type_…`), each `UNIQUE (portfolio_id, description)`, value typed by the
component's pgEnum, plus a `source` (`classifier`|`user`). Resolve a normalized
description → `value`. The frontend already does this read in
`src/lib/bulkUpload/server.ts` (`lookupOverrides`) — mirror that mapping on the
backend. **`UNKNOWN` is now a transient "needs review" marker, never a final
value** (resolved in §0): the verify gate forces the user to map every `UNKNOWN`
before Finalise, so a `UNKNOWN` (or unresolvable description) reaching the finaliser
is a defect and **fails the run loudly**.
## 8. Backend pieces to build (DDD, mirror v1)
In `/workspaces/home/github/Model`:
- **`PropertyOverrideRow`** SQLModel mirror → `infrastructure/postgres/property_override_table.py` (mirror the pattern in `property_table.py` / `landlord_*_override_table.py`; reuse a shared `override_component` SAEnum like `landlord_override_enums.py`).
- **Repository** for the override write (one per aggregate): add to
`repositories/property/` (e.g. extend the property repo or a sibling
`property_override` repo), with an `upsert_all` using
`on_conflict_do_update(index_elements=[property_id, override_component, building_part], …)`.
- **Orchestrator logic** in `orchestration/bulk_upload_finaliser_orchestrator.py`:
extend `finalise(...)` (or add a step) to, after inserting properties and getting
ids, build the override rows (join → split by part → resolve) and persist them in
the **same** `commit_scope`.
- **Handler** stays thin — it already wires S3 + engine + repos. It will need the
extra input (classifier CSV and/or `multiEntryOrdering`); decide how those reach
the Lambda (extend `BulkUploadFinaliserTriggerBody`, or read `bulk_address_uploads`
for `multiEntryOrdering` + the classifier S3 URI). The trigger currently carries
`task_id, sub_task_id, s3_uri (combiner output), portfolio_id, bulk_upload_id`.
Key v1 files to extend (all in the Model repo):
- `applications/bulk_upload_finaliser/handler.py`
- `orchestration/bulk_upload_finaliser_orchestrator.py`
- `repositories/property/property_repository.py` + `property_postgres_repository.py`
- `infrastructure/postgres/property_table.py` (reference for the new mirror)
- `infrastructure/s3/csv_s3_client.py` (`read_rows`)
- Packaging test: `tests/test_lambda_packaging.py` will flag any new top-level import
the Dockerfile doesn't `COPY` (v1 hit this with `datatypes/`).
## 9. Open questions — all RESOLVED (see §0 + ADR-0006)
- **Join key** → synthetic UUID `source_row_id` in both CSVs (not `Internal
Reference`, not architecture B).
- **`property_id` for no-UPRN rows** → out of scope; v2 is UPRN-only, no-UPRN deferred
to v3 (Model B). UPRN rows re-found by `(portfolio_id, uprn)`; no `RETURNING`.
- **Non-largest-count `multiEntryOrdering`** → capture a confirmed permutation for
**every** count ≥ 2 in the UI (supersedes ADR-0004); finaliser needs no fallback.
- **Trigger body vs handler-reads-DB****grow the trigger body** (`classifier_s3_uri`
+ `multi_entry_ordering`), built in `dispatchFinaliser`.
- **Re-materialise** → recalculate every finalise via **upsert-only** on
`(property_id, override_component, building_part)`; **no delete-orphans** (justified
by the one-real-upload assumption); `property` rows untouched.
## 10. Implementation order (design is settled — build it)
Frontend first (the finaliser depends on `source_row_id` + per-count ordering):
1. **`source_row_id`**: shared `classifierCsvKey` helper; mint the UUID in
`start-address-matching` after `readRows()`; emit it as an explicit column in both
`buildAddressCsv` and `buildClassifierCsv`. Verify it lands in a real combiner
output.
2. **Per-count ordering**: `detectMultiEntry` keeps a sample per count;
`OnboardingProgress` renders one ordering panel per count ≥ 2. Drop the
largest-count-only assumption in `setMultiEntryOrdering` if it requires the largest.
3. **Verify gate**: block Finalise while any classification is `UNKNOWN`.
4. **`dispatchFinaliser`**: add `classifier_s3_uri` + `multi_entry_ordering` to the
trigger payload.
Backend:
5. Grow `FinaliserTriggerRequest` (FastAPI) + `BulkUploadFinaliserTriggerBody` (Lambda).
6. `PropertyOverrideRow` mirror + sibling `PropertyOverrideRepository` (upsert) +
read-only `LandlordOverrideRepository`.
7. Orchestrator step (join → split → resolve → upsert; fail-loud on unresolved),
TDD against fakes (mirror
`tests/orchestration/test_bulk_upload_finaliser_orchestrator.py`).
8. Handler wiring; watch `tests/test_lambda_packaging.py` for Dockerfile COPY gaps.
Docs (done in this session): ADR-0004 amended, ADR-0006 added, `CONTEXT.md`
"Property override" updated.
## 11. Verification notes (environment)
- Frontend: `npx tsc --noEmit` (was 0 errors at v1 close).
- Model repo: `mypy`/`pytest` need a deps-installed env (the v1 session couldn't run
them locally; `/app` Docker config runs the full suite). `terraform plan` needs the
CLI. Watch `tests/test_lambda_packaging.py` for Dockerfile COPY gaps.
- v1 is committed; dev Lambda + SQS queue are deployed and working
(`FINALISER_SQS_URL` wired in `backend/.env` for local, and in terraform/fast-api).

View file

@ -0,0 +1,243 @@
# Design WIP: `bulk_upload_finaliser` + `property_overrides`
> **Status:** v1 fully resolved (grilling 2026-06-04). Ready to graduate to ADR(s).
> v2 (`property_overrides` population) deferred to its own session — see the
> "Input" application-flow item for its entry point. When decisions stabilise this should
> graduate into a new ADR in `docs/adr/` (frontend) and likely a companion ADR
> in the Model repo, plus a CONTEXT.md update (see "Docs to update").
## Goal
Two linked pieces of work:
1. **New backend application `bulk_upload_finaliser`** (lives in
`/workspaces/home/github/Model/applications/`, DDD-aligned — study
`/workspaces/home/github/Model/domain`). It reads the address-matching /
combiner output **and** the `landlord_*_overrides` vocabulary tables, then
writes Postgres correctly: the `property` rows (UPRN + address, as the
frontend does today) and — later — the new `property_overrides` rows.
Motivation: a property list can be ~40,000 rows, too big for a synchronous
Next.js HTTP handler.
2. **New `property_overrides` table** — the per-Property fact layer that
**ADR-0004 explicitly deferred** ("the per-Property building-part fact layer
that consumes `multiEntryOrdering` and writes main/extension facts at
finalise"). One row per `(property, building_part, component)` carrying the
resolved enum value + provenance.
**Split into two pieces (decided 2026-06-04):**
- **v1 — async finaliser writes `property`.** Move today's synchronous Next.js
`/finalize` property-insert into a dispatched Lambda (`bulk_upload_finaliser`),
because a property list can be ~40,000 rows. Reproduces the exact 9-column insert
+ `onConflictDoNothing`, adds the `finalising` status + async state machine, and
shifts terminal-status ownership to the backend. **Fully designed — ADR-ready.**
- **v2 — populate `property_overrides`.** The per-Property fact layer. The *table*
already shipped (migration 0221, PR #306), but population is a **separate
follow-up** with its own open input-plumbing questions (see the "Input" item
under application-flow questions). Not designed here.
This doc resolves **v1 in full**; v2 gets its own grilling session against real
classifier-CSV / combiner-output samples.
## Where this sits in the existing pipeline
```
BulkUpload → address matching → combiner → awaiting_review → [Finalise]
(new) bulk_upload_finaliser ──────────┘
reads: combiner output (S3) + landlord_*_overrides
writes: property (+ later property_overrides)
downstream: Ingestion (EPC/solar fetch)
→ PropertyBaseline (stage 2,
re-score-on-override seam,
Model ADR-0011/0012)
```
`Finalise` (the user action + state-machine gate) stays in Next.js; the new
application is the **worker it dispatches**. Downstream `PropertyBaseline`
already has an override-aware "re-score" seam — `property_overrides` will feed it.
## Decisions locked
| # | Decision |
|---|----------|
| Name | Application is **`bulk_upload_finaliser`**, in `Model/applications/`. `Finalise` stays the Next.js action that triggers it. |
| DDD | Follow the DDD structure under `Model/domain`. Domain terms discovered as needed. |
| Schema ownership | **Drizzle (frontend) owns migrations** for both `property` and the new `property_overrides`. |
| Backend access | Backend gets a **`PropertyOverrideRow` SQLModel** (mirror, like `landlord_wall_type_override_table.py`) + a **repository** (see `Model/infrastructure/postgres` + `Model/repositories` for examples). `PropertyRow` must drop its "backend never inserts" invariant and gain insertable columns. |
| Next.js `/finalize` | **Delete it** — fully replaced by the Lambda. |
| `property_overrides` shape | **Single polymorphic table**, not per-component tables. Accepts losing DB-level pgEnum typing on `value`. |
| `override_value` | `text` — a **denormalised snapshot copy** (own value per row) of the resolved enum from `landlord_*_overrides` at materialise time. Own-value (not an FK to the vocabulary) is what lets two properties sharing a description later **diverge**, and lets re-run recalculate one property's value without touching its siblings. |
| Snapshot, **not** FK to vocabulary | `property_overrides` does **not** foreign-key the originating `landlord_*_overrides` row. An FK forces every property sharing a description to share one value (forbids divergence); is structurally impossible as a real FK (4 polymorphic target tables, each with its own value enum); and would risk cascade-deleting per-property facts when re-classification prunes a vocabulary row. Lineage is preserved as a **natural key**`(portfolio_id, override_component, original_spreadsheet_description)` re-finds the vocabulary row (its `UNIQUE` is `(portfolio_id, description)`) — so deliberate re-sync needs no surrogate FK. |
| Re-run = **recalculate** | The finaliser write to `property_overrides` is `onConflictDoUpdate` on `(property_id, override_component, building_part)`, refreshing `override_value` + `original_spreadsheet_description` + `updated_at` to the latest resolution. Contrast `property`, which stays `onConflictDoNothing` (identity rows, don't churn). When per-property `source='user'` edits exist, the update must guard `WHERE source='classifier'` to preserve hand-edits (mirrors the Model classifier upsert). |
| `building_part` | **`smallint NOT NULL`**, explicit index: `0 = main building, 1 = extension 1, 2 = extension 2, …` (matches ADR-0004 `multiEntryOrdering.permutations` indexing). |
| Whole-dwelling components | **No special case.** `property_type`/`built_form` are *per-part-capable* too (an extension — conservatory, summer house — can be a different built form / property type). Today's files only supply them once, so they'll usually be written at `building_part = 0` only, but the schema allows per-part with no future migration. |
## `property_overrides` — columns so far
Roughly (subject to remaining open questions):
```
property_overrides
id uuid pk (default random) -- match landlord_* tables
property_id bigint NOT NULL FK → property.id (FE-owned table)
portfolio_id bigint NOT NULL FK → portfolio.id
building_part smallint NOT NULL -- 0 = main, 1 = ext 1, 2 = ext 2, …
override_component override_component NOT NULL -- column name == enum type name; pgEnum {wall_type, roof_type, property_type, built_form_type} (Q6 ✓)
override_value text NOT NULL -- snapshot copy of landlord_* resolved enum (free text; `override_component` carries the typing)
-- (no `source`) — dropped Q9: pure value snapshot; add back as nullable column if/when a per-property edit path needs provenance
original_spreadsheet_description text NOT NULL -- raw spreadsheet cell text this snapshot resolved from (Q7 ✓)
created_at timestamptz NOT NULL default now()
updated_at timestamptz NOT NULL default now()
-- UNIQUE (property_id, override_component, building_part) -- Q8 ✓ (source NOT in key — mirrors ADR-0004 single-row flip; portfolio_id implied by property_id)
-- FK property_id → property.id ON DELETE CASCADE; portfolio_id → portfolio.id (Drizzle only; bare bigint in SQLModel mirror); portfolio_id kept (matches property_details_epc / property_targets)
```
## Open questions (resume here)
- **Q6 — `component` discriminator. RESOLVED 2026-06-04.** pgEnum
**`override_component`** (column `component`) with values
**`wall_type`, `roof_type`, `property_type`, `built_form_type`**. Verified these
are the *exact* keys used both in the frontend
([columnFields.ts:30-33](../../src/lib/bulkUpload/columnFields.ts#L30-L33)) and
the backend (`ClassifiableColumn.name` / handler `_build_columns()`), so the
finaliser maps category → component with **no translation**. pgEnum over text:
small closed set, typos caught at write time — and this is now the *only*
DB-level typing left on a row, since `override_value` is free text. New component
= one-line `ALTER TYPE … ADD VALUE` (Drizzle-owned). Enum named `override_*`
(not `property_*`) to sit with `override_source` and stay visually distinct from
the existing *value* enum `property_type`.
- **Q7 — store raw description per override row? RESOLVED 2026-06-04: yes, as
`original_spreadsheet_description text NOT NULL`.** Names the *source artifact*
(the spreadsheet cell), not an actor — sidesteps the Landlord-vs-User conflation
the glossary warns against, and aligns with CONTEXT.md's "the source file". Stored
because `override_value` is a denormalised snapshot that deliberately won't
refresh on later vocabulary edits; pinning the original text makes each row
self-explaining and re-resolvable even after the source `landlord_*_overrides` row
changes. `NOT NULL` is safe **iff** every `property_overrides` row is materialised
from a `landlord_*_overrides` row (whose `description` is itself `NOT NULL`) —
confirm when settling Q9/source semantics.
- **Q8 — uniqueness + FKs. RESOLVED 2026-06-04.**
`UNIQUE (property_id, override_component, building_part)`. `building_part` is in
the key (part 0 and part 1 both carry e.g. a `wall_type` row). `source` is
**deliberately not** in the key — mirrors ADR-0004's single-row-flip (one row,
flip `source` in place; the two-row model was rejected). `portfolio_id` is not in
the key (implied by `property_id`) but **is kept as a column** for query ergonomics
and consistency with `property_details_epc` / `property_targets`, which both
denormalise it. FKs: `property_id → property.id ON DELETE CASCADE`;
`portfolio_id → portfolio.id ON DELETE CASCADE` in the Drizzle migration, but a
bare `bigint` (no FK) in the backend `PropertyOverrideRow` SQLModel mirror —
matching `landlord_wall_type_override_table.py`.
- **Q9 — `source` semantics. RESOLVED 2026-06-04: drop `source` entirely.**
`property_overrides` is a pure **snapshot of resolved values**. Rationale: there
is no per-property override concept today (per ADR-0004 edits happen at the
**vocabulary/portfolio** level, flipped in place), so a copied `source` would
describe the *vocabulary mapping's* provenance, not this property's — a footgun a
reader/re-score rule could misread, and no consumer needs it in v1. When a genuine
per-property edit path lands (the real use for per-property provenance), `source`
returns as an **additive nullable-column migration** — no need to carry it now.
This also confirms the Q7 `NOT NULL` contingency: every row is still materialised
from a `landlord_*_overrides` row (`description NOT NULL`).
- **Q-scope — v1 scope. RESOLVED 2026-06-04.** v1 = the finaliser reproduces
today's **exact 9-column `property` insert** (`portfolio_id`,
`creation_status='READY'`, `uprn`, `landlord_property_id``Internal Reference`,
`address` = matched ?? user-inputted, `postcode`, `user_inputted_address`,
`user_inputted_postcode`, `lexiscore`) **+** `onConflictDoNothing` on
`(portfolio_id, uprn) where uprn is not null` — not a reduced "UPRN + address".
This sizes the "PropertyRow gains insertable columns" decision to all nine
columns plus `creation_status`. The `property_overrides` *table* shipped ahead
(migration 0221, PR #306) but is **not populated** in v1 — population is
follow-up work (and needs a different input source; see combiner-output note
below).
### Application-flow questions not yet reached
- **Trigger + orchestration. RESOLVED 2026-06-04.** Mirror the
`start-address-matching` path. Next.js creates a `SubTask` (`service:
"finaliser"`) under the BulkUpload's existing Task, then POSTs a new FastAPI
endpoint `POST /v1/bulk-uploads/trigger-finaliser` (auth via `validate_token`),
which enqueues to a **new SQS queue**; a Lambda runs the finaliser wrapped in
`@subtask_handler` (auto-injected `TaskOrchestrator`; `run_subtask` owns the
subtask start/complete/fail + Task cascade). Trigger body
`FinaliserTriggerBody { task_id, sub_task_id, s3_uri (combined output), portfolio_id }`
(extends `SubtaskTriggerBody`). Slow work stays outside the txn; persistence in a
`commit_scope`. The synchronous Next.js `/finalize` route is deleted (locked).
- **State machine / who writes `complete`. RESOLVED 2026-06-04.** New status
**`finalising`** between `awaiting_review` and `complete` (mirrors `combining`
before `awaiting_review`). Lifecycle:
`awaiting_review → finalising → complete` (↘ `failed`).
- **`finalising`** written by **Next.js at dispatch** via a **compare-and-swap**:
`UPDATE … SET status='finalising' WHERE id=? AND status='awaiting_review'`
0 rows ⇒ already dispatched ⇒ 409. This is the **double-dispatch guard** (closes
the simultaneous-click race under `loadForFinalize`'s existing precondition).
- **`complete` / `failed`** written by the **Lambda** directly to
`bulk_address_uploads` (new `set_finalized_status` / `set_failed_status`,
exactly like the combiner's `set_combining_status` /
`set_combined_output_s3_uri`). `markFinalized` + the Next.js `/finalize` route
are deleted.
- **CONTEXT.md "Two writers" change:** Next.js owns dispatch + the
`awaiting_review → finalising` CAS; the backend owns `finalising → complete`
and `→ failed` (in addition to `combining` / `awaiting_review`).
- **UI vs canonical:** persisted enum value is `finalising` (canonical; ties to
the **Finalise** action). The frontend renders it as **"Uploading to ARA"** — a
display-layer label only, **not** the enum name, so UX copy never needs a
migration.
- **Input — does the combiner output carry the raw description cells? RESOLVED
2026-06-04: NO. This is a v2 problem (deferred).** v1 needs only address/UPRN
columns, all **confirmed present** in the combiner output (`address2uprn_uprn`,
`address2uprn_address`, `address2uprn_lexiscore`, `Internal Reference`,
`Address 1/2/3`, `postcode`). The raw `Walls`/`Roofs`/`Property Type`/`Built Form`
cells are **not** in the combiner output — they survive only in (a) the
`{uploadId}-classifier.csv` on S3 (original headers) and (b) `landlord_*_overrides`
as *resolved* values keyed by description. So v2 population must assemble **four
inputs**, not one file:
- `property_id` (identity) ← combiner output `(portfolio_id, uprn)` — **but
no-UPRN rows have no such key**;
- raw cell text ← the classifier CSV (not the combiner output);
- cell → building-part split ← `multiEntryOrdering` on `bulk_address_uploads`;
- description → `override_value``landlord_*_overrides` (normalized description).
- **Two open v2 hazards (entry point for the v2 session):** (1) the join key
between classifier CSV and combiner output — is there a stable per-row key
(`Internal Reference`?) and is row order preserved through postcode-split +
combine? (2) obtaining `property_id` for unmatched (no-UPRN) rows — v1's
`onConflictDoNothing` returns no ids, so v2 likely needs `RETURNING id` mapped
back to source rows.
- **Idempotency / re-run. RESOLVED 2026-06-04 (per-table).**
- `property`: keep today's `onConflictDoNothing` on `(portfolio_id, uprn) where uprn is not null` — existing properties are not churned.
- `property_overrides`: `onConflictDoUpdate` on `(property_id, override_component, building_part)`**recalculate** `override_value` + `original_spreadsheet_description` + `updated_at` to the latest resolution, so an existing property whose override changed is refreshed in place. Guard `WHERE source='classifier'` once a per-property user-edit path exists (until then every row is classifier-derived, so blind overwrite is correct). See the "Re-run = recalculate" and "Snapshot, not FK" locked decisions.
## Key code references (from exploration)
**Frontend (`assessment-model`):**
- [finalize/route.ts](../../src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/finalize/route.ts) — today's synchronous property insert (to be deleted).
- [property.ts](../../src/app/db/schema/property.ts) — `property` table (`property_type`, `built_form` columns exist; `uq_property_portfolio_uprn`).
- [landlord_overrides.ts](../../src/app/db/schema/landlord_overrides.ts) — the four per-component override tables + all pgEnums (`wallTypeEnum`, `roofTypeEnum`, `propertyTypeEnum`, `builtFormTypeEnum`, `overrideSourceEnum`).
- [bulk_address_uploads.ts](../../src/app/db/schema/bulk_address_uploads.ts) — `multiEntryOrdering` (permutations, `0=main`), `multiEntrySummary`, `verifyAck`, `combinedOutputS3Uri`.
- [ADR-0004](../adr/0004-multi-entry-building-part-ordering.md) — defers exactly this fact layer; the building-part ordering model.
- [ADR-0002](../adr/0002-landlord-override-vocabulary.md) — vocabulary layer.
**Backend (`Model`):**
- `applications/landlord_description_overrides/handler.py` — the worker pattern to mirror (`subtask_handler`, `TaskOrchestrator`, trigger body, `commit_scope`).
- `infrastructure/postgres/landlord_wall_type_override_table.py` — SQLModel mirror pattern for the new `PropertyOverrideRow`.
- `infrastructure/postgres/landlord_override_enums.py` — shared `override_source` SAEnum pattern.
- `infrastructure/postgres/property_table.py``PropertyRow` defensive view ("backend never inserts" — to change).
- `repositories/landlord_overrides/landlord_override_repository.py` — repository pattern for the new override repo.
- `orchestration/landlord_description_overrides_orchestrator.py` — orchestrator pattern; note it splits cells into an orderless set (discards part order — recovered via `multiEntryOrdering`).
- Downstream: `orchestration/property_baseline_orchestrator.py` (re-score-on-override seam), `orchestration/ingestion_orchestrator.py`.
## Docs to update (when this lands)
- **CONTEXT.md**: `Property Type` / `built_form` are **per-part-capable**, not
whole-dwelling. Add the per-Property fact layer (`property_overrides`) to the
glossary + relationships. Possibly a `building_part` index definition.
- **New ADR** (frontend) for `property_overrides` + finaliser; companion Model ADR
for the cross-repo write, citing ADR-0003/0004.

View file

@ -1,13 +1,14 @@
# Landlord override frontend — in-flight design notes
**Status:** Paused mid-grilling (2026-05-27)
**Branch:** `feautre/additional_walltypes`
**Status:** Grilling complete (2026-05-28) — Q1Q7 resolved; ready to promote to ADR
**Branch:** `feature/frontend_landlord_overrides`
**Author:** Jun-te (with Claude, via `/grill-me`)
This is a *design-in-progress* document, not an ADR. It captures decisions made
so far on the landlord-override frontend plan so the conversation can resume
without re-litigating settled questions. Promote to an ADR once the trigger
mechanism (Q4) is resolved — that's the decision worth permanent recording.
This is a _design-in-progress_ document, not an ADR. It captures the decisions
reached during grilling so the conversation can resume without re-litigating
settled questions. All seven questions are now resolved; the trigger +
state-machine integration (Q4) is ready to promote to a frontend ADR-0003, and
the work is ready to break into issues.
## Goal
@ -23,14 +24,22 @@ Build the front-end e2e for `landlord_description_override`, starting from the
rationale in [ADR-0002](../adr/0002-landlord-override-vocabulary.md).
- Nothing in Next.js reads or writes them yet.
- The Python lambda at
`/workspaces/home/github/Model/applications/landlord_description_overrides/handler.py`
is **not deployed** and **not wired** into the BulkUpload pipeline. It
hardcodes its trigger params (`portfolio_id`, `s3_uri`) and its source column
names (`"Property Type"`, `"Walls"`, `"Roofs"`).
- Note: ADR-0002 says writes come from Next.js POST, but the current backend
writes direct to Postgres. This drift may need to be revisited under Q4.
`Model/applications/landlord_description_overrides/handler.py` (branch
`feature/landlord_data`) is **not deployed** and **not wired** into the
BulkUpload pipeline. It hardcodes the trigger fields (handler.py:55-60) and
builds a hardcoded `ClassifiableColumn` list with `source_column`s
`"Property Type"` (→ both `property_type` **and** `built_form_type`),
`"Walls"`, `"Roofs"`. It also caps the batch to 20 rows while under test.
- **Resolved drift (was a Q4 risk):** the backend's
[ADR-0003 (python-writes-landlord-overrides-directly)](https://github.com/Hestia-Homes/Model/blob/main/docs/adr/0003-python-writes-landlord-overrides-directly.md),
accepted 2026-05-26, **supersedes** ADR-0002's "writes happen from Next.js"
clause. The lambda computes **and** persists the classification directly to
Postgres via a SQLAlchemy `LandlordOverrideRepository[E]`, with an
`ON CONFLICT … WHERE source = 'classifier'` upsert. There is **no** Next.js
POST-back. Drizzle stays schema source-of-truth; the Python `SQLModel`
shadows it.
## Decided so far
## Decided
### Q1 — Scope
@ -41,8 +50,7 @@ to the lambda → lambda needs edits to work when deployed.
### Q2 — Categories
**All four classifier categories** get independent optional slots in
[`INTERNAL_FIELDS`](../../src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/map-columns/MapColumnsClient.tsx#L14-L21):
**All four classifier categories** are independent optional fields:
`property_type`, `built_form_type`, `wall_type`, `roof_type`.
Rejected alternatives: (a) start with only PT+BF — wasted plumbing churn for
@ -50,58 +58,121 @@ the same migration cost; (c) collapse PT+BF into one UI slot — bakes a
backend coincidence (they read the same CSV column today) into the
user-facing model.
### Q2.1 — No `autoDetect` for the new slots
### Q2.1 — No `autoDetect` for classifier fields
The four new slots default to `"skip"`. The user must explicitly map them.
`autoDetect()` regex patterns are for required address-ish fields only.
The four classifier fields default to unmapped ("Not provided"). The user must
explicitly map them. `autoDetect()` is for required address-ish fields only.
**Why:** Address headers are unambiguous and required, so guessing is safe and
useful. Landlord-description columns are ambiguous (a "type" column could be
PropertyType or BuiltFormType or something else) and they are optional —
useful. Landlord-description columns are ambiguous and optional —
auto-detecting them would silently opt the landlord into classifier runs they
didn't intend.
## Open — resume here
### Q2.2 — Mapping shape: unified `field → header` (refines Q2's mechanism)
### Q3 (in flight) — Uniqueness validation on the mapping
The mapping is **one unified map keyed by internal field, valued by source CSV
header** (`{ address_1: "Addr 1", property_type: "Property Type",
built_form_type: "Property Type", wall_type: "Walls", … }`), replacing the
current `header → field` shape
([MapColumnsClient.tsx:62-64](<../../src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/map-columns/MapColumnsClient.tsx#L62-L64>)).
Today validation only checks required fields exist
([MapColumnsClient.tsx:67-68](../../src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/map-columns/MapColumnsClient.tsx#L67-L68));
two CSV headers can both map to `address_1` silently.
**Why:** the backend's `ClassifiableColumn` is a `(name, source_column)` pair
where **one `source_column` feeds many `name`s**`"Property Type"` feeds both
`property_type` and `built_form_type`
([classifiable_column.py:28-31](https://github.com/Hestia-Homes/Model/blob/feature/landlord_data/orchestration/classifiable_column.py)).
A `header → field` map (one value per header) **cannot express** that; the
chosen mechanism in Q2 (extending the single `INTERNAL_FIELDS` dropdown) would
leave `built_form_type` unmappable. `field → header` matches the backend model
1:1 and lets multiple categories share a header.
- (a) Leave it alone — backend last-wins.
- (b) Enforce uniqueness only on the four new slots.
- (c) Enforce uniqueness everywhere except `skip`. **Recommended.**
**Consequences:** UI inverts to one row per internal field with a header-picker;
`autoDetect` inverts to "best header per field"; "skip" becomes a per-field
"Not provided"; rewrite
[`transformFile`](<../../src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/start-address-matching/route.ts#L17-L54>)
(currently iterates `header → field`), the `z.record` route schema, and
**migrate existing persisted `columnMapping` rows** (semantics flip) — see Q5.
### Q4 (queued — biggest) — Trigger mechanism
### Q3 — Sharing / uniqueness rule (reframed)
How does Next.js invoke the lambda once mapping is complete? SQS message?
Direct lambda invoke? HTTP endpoint? And what's the state-machine integration —
new `BulkUpload` status, or runs orthogonally to address matching?
A source header may feed **at most one address/reference field**, but **any
number of classifier fields** (PT + BF → `"Property Type"` is required, so
classifier sharing must be allowed). Required-field validation stays:
`address_1` and `postcode` must each be assigned a header.
This drives both the deployment work and the lambda edits. Likely worth its
own ADR once decided.
This **replaces** the original "enforce uniqueness everywhere except skip"
framing, which was wrong once classifier header-sharing became a requirement.
### Q5 (queued) — Persistence of the extended mapping
### Q4 — Trigger mechanism + state-machine integration (the big one)
Current `bulkAddressUploads.columnMapping` is a `Record<string, string>` and
naturally accommodates the new slots. Confirm no separate table is needed.
- **Transport:** reuse
[`triggerFastApiPipeline`](../../src/lib/bulkUpload/server.ts) with a **new
FastAPI endpoint**; payload `{ task_id, sub_task_id, s3_uri, portfolio_id,
column_mapping }`. FastAPI turns the POST into the SQS subtask envelope the
handler TODO (handler.py:51-54) references. Not a direct lambda invoke, not
Next.js → SQS.
- **`s3_uri` is the ORIGINAL upload** (`upload.s3Bucket/s3Key`), **not** the
address-matching transformed CSV — the description columns and their original
header names only survive in the original (the address transform strips every
non-address column).
- **Writes:** lambda writes overrides directly to Postgres (ADR-0003); no
POST-back.
- **State machine:** the classifier runs as a **subtask under the same address
task** (not a separate task, not a new `BulkUpload` status). Both subtasks
fire together at the **"Start address matching"** action. The combined address
*output* is not affected (the classifier writes Postgres, not
`ara_raw_outputs/{task_id}/`), **but** the parent Task *status* IS recomputed
from all subtasks (`TaskOrchestrator._cascade`), so a classifier failure fails
the onboarding task and gates the combiner. This coupling was knowingly
accepted (2026-05-28), superseding Q1 non-blocking — see ADR-0003's Amended
note. (Corrects an earlier wrong claim that the envelope does no gating.)
- **Progress honesty:** add a nullable **`service`/`kind` discriminator to
`sub_task`** (existing rows = address/legacy) so the progress view shows
address batches vs classification separately and attributes failures
correctly. Update the 3 subtask-count sites + `OnboardingProgress`.
### Q6 (queued) — Lambda edits
### Q5 — Persistence + migration of the inverted mapping
Handler hardcodes `source_column="Property Type" / "Walls" / "Roofs"`; needs
to read the mapping from the trigger body.
`LandlordDescriptionOverridesTriggerBody` already exists — check what fields
it has vs needs.
Reuse `bulkAddressUploads.columnMapping` (jsonb `Record<string, string>`); **no
separate table**. The Q2.2 inversion (`header → field``field → header`) is
handled by a **one-shot data migration**: invert each non-skip entry; on a
legacy duplicate (two headers → one field, which the new Q3 rule forbids
anyway) last-write-wins; `skip` entries drop. The migration is a no-op on empty
tables, so it's safe regardless of data volume. `validateMapping`
([server.ts:97-102](../../src/lib/bulkUpload/server.ts#L97-L102)) and the route
schema must be rewritten for the inverted shape (they currently check the
**values** for `address_1`/`postcode`).
### Q7 (queued) — Review/Edit UI for classified mappings
### Q6 — Lambda edits
Is the per-row review/edit surface in scope for this iteration, or deferred?
User has not addressed yet. ADR-0002 calls this "the future override
frontend" and treats it as deferred work — but "front end e2e from
bulk_upload" could reasonably include it.
`LandlordDescriptionOverridesTriggerBody` (task_id, sub_task_id, s3_uri,
portfolio_id; `extra="allow"`) gains a **`column_mapping: dict[str, str]`**
field carrying **only the classifier subset** (`category → source header`); the
frontend extracts the four classifier keys from its unified map before sending.
The handler keeps a fixed registry of the four category builders (each owns its
enum, repo, and any hint such as the wall construction-date one) and supplies
`source_column` from `column_mapping`, **skipping categories the user didn't
map**. Both `property_type` and `built_form_type` carry `"Property Type"`, so
PT+BF sharing falls out for free. Drop the hardcoded trigger and the 20-row cap.
## Resuming
### Q7 — Results UI: read-only this iteration
Re-read this file, then ask Q3. Don't re-litigate Q1/Q2 unless the user
reopens them.
This iteration ships a **read-only** results surface: it reads the four
`landlord_*_overrides` tables and shows each `description → value` (per
portfolio) so the pipeline is observably e2e. **No editing / write-back** — the
user-override write path (correcting a row to `source='user'`, which the
classifier upsert won't overwrite) is the deferred "future override frontend"
ADR-0002 anticipates.
## Next steps
Grilling is complete (Q1Q7). Suggested follow-ups:
1. **Done** — Q4 promoted to
[ADR-0003](../adr/0003-classifier-triggers-as-address-subtask.md) (trigger +
state-machine integration), cross-linking the backend's ADR-0003.
2. Break the work into issues (`/to-issues`): (a) invert the mapping shape + UI
+ migration + validation; (b) `sub_task` discriminator + progress view;
(c) classifier trigger (new FastAPI endpoint + payload, fire at "Start");
(d) lambda edits (trigger body + registry-from-mapping); (e) read-only
results view.
3. Confirm the working branch (see header) before implementation starts.

View file

@ -0,0 +1,88 @@
# Landlord override e2e — verification & deploy checklist
**Created:** 2026-05-28
**Branch:** `feature/frontend_landlord_overrides` (this repo) + `feature/landlord_data` (Model repo)
**Plan:** [landlord-override-frontend-plan.md](./landlord-override-frontend-plan.md) · **ADR:** [0003-classifier-triggers-as-address-subtask.md](../adr/0003-classifier-triggers-as-address-subtask.md)
## Context for picking this up cold
The landlord-classifier e2e is **implemented across both repos but uncommitted**, and
**statically verified only** (frontend `tsc` 0 errors, `next lint` clean, backend
`py_compile` clean). It has **not** been run live — that needs the steps below
(migrations applied, SQS queue + env, FastAPI endpoint + lambda deployed with
OpenAI/S3/Postgres access). Two migration files are **generated but not applied**:
`0215_invert_column_mapping.sql` (data) and `0216_add_subtask_service.sql` (schema).
Work through the sections in order — each step's prerequisites come first.
---
## A. Before you start
- [ ] Use dev/preview first, not prod.
- [ ] Confirm `.env.local` DB creds (`DB_HOST/PORT/USERNAME/PASSWORD/NAME`) point at the target DB.
- [ ] **Back up** `column_mapping` — the 0215 inversion is one-shot/irreversible:
```sql
CREATE TABLE _bak_bulk_mapping AS
SELECT id, column_mapping FROM bulk_address_uploads WHERE column_mapping IS NOT NULL;
```
## B. Database migrations ⚠️ read the gotcha
`0215` = data (inverts `header→field``field→header`); `0216` = schema (`ADD COLUMN sub_task.service`).
⚠️ package.json only has `migration:push` (`drizzle-kit push`). **`push` diffs schema and will NOT run the 0215 data `UPDATE`** — it would add the column but silently skip the inversion. Use `migrate`:
- [ ] ```bash
npx drizzle-kit migrate # runs 0215 then 0216 in order
```
(If the team only uses `push`: run `push` for 0216, then execute `0215`'s SQL manually.)
- [ ] ⚠️ Run **0215 exactly once, on old-shape data**. Re-running re-inverts and corrupts. `migrate` guards via the journal; manual runs don't.
- [ ] Verify 0215 — values should now be headers:
```sql
SELECT column_mapping FROM bulk_address_uploads WHERE column_mapping IS NOT NULL LIMIT 5;
-- expect {"address_1":"Addr 1","postcode":"PCode", ...}
```
- [ ] Verify 0216:
```sql
SELECT 1 FROM information_schema.columns
WHERE table_name='sub_task' AND column_name='service';
```
## C. Backend deploy (Model service)
- [ ] Create an SQS queue for the classifier (e.g. `landlord-description-overrides`).
- [ ] Set **`LANDLORD_OVERRIDES_SQS_URL`** in the FastAPI env to that queue.
- [ ] Deploy FastAPI so `/v1/bulk-uploads/trigger-landlord-overrides` is live.
- [ ] Deploy the lambda (`applications/landlord_description_overrides`) + event-source mapping queue → lambda.
- [ ] Lambda env/IAM: `OPENAI_API_KEY`, Postgres creds, **S3 read on the original-upload bucket** (it reads `upload.s3Bucket/s3Key`, not `retrofit-data-dev`).
## D. Frontend deploy
- [ ] Deploy `assessment-model` with the new code (`FASTAPI_API_URL` / `FASTAPI_API_KEY` already set).
## E. Verify the UI (Column Remapper)
- [ ] Map-columns page shows **one row per field with a header dropdown**, split into **Address fields** + **Landlord description fields**.
- [ ] Leaving Address 1 / Postcode unset blocks submit.
- [ ] Two **address** fields → one column is blocked; the **same** column → Property Type + Built Form is allowed.
- [ ] Existing `mapping_complete` uploads open with their mapping intact (confirms 0215).
## F. Verify trigger → classify → persist
- [ ] Map ≥1 landlord-description field, click **Start address matching**.
- [ ] `sub_task` has two rows under the task: `service='address2uprn'` and `service='landlord_description_overrides'`.
- [ ] SQS message enqueued + lambda ran (CloudWatch).
- [ ] Rows appear with `source='classifier'`:
```sql
SELECT description, value FROM landlord_property_type_overrides WHERE portfolio_id = <id> LIMIT 10;
```
## G. Verify the results view
- [ ] `/portfolio/<id>/landlord-overrides` lists `description → value` per category with a "classifier" badge. (No nav link yet — reach by URL.)
## H. Regression — address matching unaffected
- [ ] The same upload's address pipeline still emits the canonical CSV (`Address 1`/`postcode`) and combines normally.
## I. Watch-out (by design — ADR-0003 "accepted coupling")
- [ ] If the classifier subtask **fails**, the shared onboarding task goes FAILED and **"Run Combiner" is blocked**; the task only COMPLETEs once classification finishes. If painful, switch to the separate-task design in the ADR.
---
## Still open / not done
- [ ] Commit the work (this repo + Model repo, separately) — currently uncommitted.
- [ ] Nav link to `/portfolio/<id>/landlord-overrides` (reachable by URL only).
- [ ] User-edit write-back for overrides (deferred — Q7 "read-only this iteration").

View file

@ -0,0 +1,178 @@
# Multi-entry building-part ordering — in-flight design notes
**Status:** Grilling complete (2026-06-02) — ready to break into issues
**Branch:** `feature/frontend_landlord_overrides`
**Author:** Jun-te (with Claude, via `/grill-me`)
A _design-in-progress_ document, not the ADR. It records the decisions reached
during grilling so the conversation can resume without re-litigating settled
questions. The flow + schema decision is promoted to
[ADR-0004](../adr/0004-multi-entry-building-part-ordering.md); new domain terms
are promoted to [CONTEXT.md](../../CONTEXT.md#building-parts).
## Goal
After address matching and classification finish, a single address row can carry
**comma-separated entries** in physical-element columns — e.g.
`Walls = "Cavity: AsBuilt (1976-1982), Cavity: FilledCavity"`,
`Roofs = "Flat: As Built, PitchedNormalLoftAccess: 200mm"`. Each entry is a
**building part** (main building + extensions). The order is ambiguous and a
**consistent per-file mistake**, so we capture the correct ordering from the user
**once per file** and persist it on the BulkUpload for a later consumer.
## Backstory / ground truth (verified against the example file + code)
- In `ARA AddressProfiling_Download_28-04-2026_10501 (2).xlsx` (32,213 data
rows): **0 UPRNs appear in more than one row** — multi-entry is
comma-separated values **inside one cell**, never multiple rows per address.
- In a multi-entry row the multi-valued columns **agree on count** (Walls=2 ∧
Roofs=2) while whole-dwelling columns stay at 1 (`Property Type` = `"House:
EndTerrace"`). So position *i* is the **same building part across every
multi-valued column**.
- The classifier today **discards** this: [`get_col_to_description_mappings`](/workspaces/home/github/Model/orchestration/landlord_description_overrides_orchestrator.py)
does `value.split(",")` into a **`set`** — orderless, deduped. Correct for the
vocabulary layer (description→enum), but it drops exactly the
position/building-part association this feature needs.
- This is the **per-Property building-part fact** territory ADR-0002 deferred
("a per-Property fact layer (not yet modelled)"). We are **not** building that
layer here — only **capturing** the ordering it will need.
## Decided
### Q1 — Order semantics: full reorder, keyed by count
Position *i* = a building part. The user supplies a **permutation per distinct
entry-count**; persisted as `{ count: permutation }`. This iteration captures
only the **largest-count** sample (see Q5).
### Q1.1 — Order scope: one ordering across all columns
A single per-count permutation realigns **every** multi-valued column at once
(index-aligned — Walls[i] and Roofs[i] are the same part). Not per-column.
Matches the data (counts agree across columns).
### Q1.2 — Mixed counts: single-value columns are whole-property
A 1-entry column (e.g. `Property Type`) is a **whole-dwelling** fact attached to
the property; only columns with N>1 are sliced into building parts. No padding.
### Q2 — Scope: capture + persist ordering only
Detect multi-entry, show one sample address + our classification, capture the
per-count ordering, persist on the BulkUpload. **Not** in scope: the
per-Property fact table or writing main/extension facts at finalise. The
ordering is stored for a later consumer.
### Q2.1 — Editable verification IS in scope (expands Q2)
The "verify classification" step lets the user **correct** a classification,
written back as `source='user'`. This deliberately picks up ADR-0002 Q7's
deferred **vocabulary** user-override write path — distinct from the per-Property
fact layer, which stays deferred.
### Q3 — Placement: on the `awaiting_review` surface
Render the flow on the existing
[OnboardingProgress](<../../src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx>)
page when `status === "awaiting_review"`. Classification finishes *before* the
combiner (both subtasks must complete → combiner → `awaiting_review`), so by the
time Finalise is offered the classification output exists. No new route.
### Q3.1 — Flow: two-step stepper, steps appear independently
- **Step 1 — Verify classification** — shows whenever **≥1 classifier column**
was mapped.
- **Step 2 — Confirm order** — shows only when **multi-entry was detected**.
- A file with classifier columns but no multi-entry shows only Step 1; a file
with neither goes straight to Finalise.
### Q3.2 — Gate: both steps gate Finalise (where each applies)
`canFinalize = status==="awaiting_review" && (noClassifierCols || verifyAck) &&
(noMultiEntry || orderingConfirmed)`. Two flags persisted. Finalise is one
click but the button stays disabled until its applicable gates are satisfied.
### Q4 — Verify step lists the sample address's entries only
Step 1 lists just the descriptions in the **one sample address** (matches "one
address"). Because a correction is per-`(portfolio, description)`, editing one
changes the mapping **portfolio-wide** for that text — the UI must say so. A
spot-check, not full-vocabulary coverage.
### Q4.1 — Write-back: Next.js upsert, `source='user'`, single row (as built)
A Next.js route handler / server action upserts the `landlord_*_overrides` row
by `(portfolio_id, description)` setting `value` + `source='user'`, validating
against the pgEnum. **Schema unchanged** — we keep ADR-0002's `UNIQUE
(portfolio_id, description)` and flip the single row's source in place. The
Python classifier's existing `ON CONFLICT … WHERE source='classifier'`
([landlord_overrides_postgres_repository.py:84-91](/workspaces/home/github/Model/infrastructure/landlord_overrides/landlord_overrides_postgres_repository.py#L84))
then never re-clobbers it.
> Considered and **rejected**: two rows per description (classifier + user) with
> read-time `user > classifier` resolution. It buys "revert to our suggestion" +
> provenance, and is cheap now (no readers exist yet), but reopens ADR-0002's
> `UNIQUE` decision and migrates Drizzle + 4 Python tables + the conflict target.
> Not worth it for this iteration; the single-row flip already gives "user wins".
> This is the first Next.js writer of a `source='user'` row.
### Q5 — Which sample: the largest-count row
Show one sample address — the row with the **most** building parts — so ordering
it reveals the fullest convention. In the common case (only N=2) that is a
single 2-part address.
### Q5.1 — Reorder UI: label each position
Lay the file's entries out as rows (position 0, 1, …), each with a building-part
dropdown (**Main building** / **Extension 1** / …). Assigning labels yields the
permutation and validates (each part used once, exactly one Main building). All
multi-valued columns are shown together, each raw entry annotated with our
classified enum, so the user sanity-checks classification **and** alignment.
### Q6 — Detection: at start, persist a summary
Compute the multi-entry summary in the **start-address-matching POST**
([route.ts:106](<../../src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/start-address-matching/route.ts#L106>))
where the full `rows` are already parsed in memory — which columns are
multi-valued, the distinct counts (with row-counts so we can pick the largest),
and the largest-count sample (address + per-column raw entries). Avoids
re-reading a 32k-row file at render. Classification enums are joined at render
from the override tables.
### Q7 — Persistence: two jsonb columns on `bulk_address_uploads`
- `multiEntrySummary jsonb` — written at start (detection).
- `multiEntryOrdering jsonb` — written at confirm: `{ count: permutation }` plus
`verifyAck` / `orderingConfirmed` flags (final shape TBD; may split flags into
their own columns).
No new table — mirrors how `columnMapping` lives on the upload row.
## Risks / load-bearing assumptions
1. **Consistent-mistake assumption.** All rows of a given count share one
ordering convention. The whole "ask once" design rests on this; if a file
mixes conventions within a count, a single per-count permutation is wrong.
2. **Largest-count-only capture.** Smaller counts stay unpopulated in the map.
A future consumer (or a later UI iteration) needs a derivation rule to apply
the convention to other counts.
3. **Normalization coupling — mitigated.** To join the sample's raw entries to
the override tables the frontend must match the backend's `split(",")`
`strip``lower`. **Resolution:** store the *normalized* description keys in
`multiEntrySummary` at start (the route already holds the rows), so the
render-time join is exact-match — no cross-repo string-normalization drift.
4. **Portfolio-wide blast radius.** A verify-step edit changes the mapping for
every row with that description, not just the sample address. Must be
messaged in the UI.
## Suggested issues (`/to-issues`)
1. Schema: two jsonb columns on `bulk_address_uploads` + migration.
2. Detection at start: compute + persist `multiEntrySummary` (with normalized
description keys).
3. Verify step: list sample descriptions → enum (join override tables),
editable; Next.js upsert route writing `source='user'`; `verifyAck` flag.
4. Order step: largest-count sample, position→part dropdowns → permutation;
persist `multiEntryOrdering`; `orderingConfirmed` flag.
5. Gate: wire `canFinalize` to the two flags; conditional stepper rendering.

View file

@ -0,0 +1,68 @@
import {
getSampleClassifications,
getUnknownOverrides,
setClassificationOverride,
} from "@/lib/bulkUpload/server";
import { NextRequest, NextResponse } from "next/server";
import { getServerSession } from "next-auth";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
import { z } from "zod";
// Read-only: the classifier's resolved enums for the review sample's entries
// (field -> description -> value), plus the descriptions still classified
// `Unknown` portfolio-wide — the Finalise gate blocks until that list is empty
// and the user can resolve each via PATCH below (ADR-0004 #298, ADR-0006).
export async function GET(
_request: NextRequest,
{ params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
) {
const session = await getServerSession(AuthOptions);
if (!session) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
const { portfolioId, uploadId } = await params;
const [classifications, unknown] = await Promise.all([
getSampleClassifications(uploadId),
getUnknownOverrides(portfolioId),
]);
return NextResponse.json({ classifications, unknown }, { status: 200 });
}
const PatchSchema = z.object({
field: z.string(),
description: z.string(),
value: z.string(),
});
// Correct one classification, written as a user override (source='user'). The
// edit is per-(portfolio, description), so it applies portfolio-wide (issue #299).
export async function PATCH(
request: NextRequest,
{ params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
) {
const session = await getServerSession(AuthOptions);
if (!session) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
const { portfolioId } = await params;
let body;
try {
body = PatchSchema.parse(await request.json());
} catch {
return NextResponse.json({ error: "Invalid input" }, { status: 400 });
}
try {
const result = await setClassificationOverride(
portfolioId,
body.field,
body.description,
body.value,
);
if (result.kind === "invalid")
return NextResponse.json({ error: result.reason }, { status: 422 });
return NextResponse.json({ ok: true }, { status: 200 });
} catch (error) {
console.error("Failed to save classification override:", error);
return NextResponse.json({ error: "Internal server error" }, { status: 500 });
}
}

View file

@ -25,6 +25,10 @@ export async function POST(
);
case "already_combined":
return NextResponse.json({ alreadyCombined: true }, { status: 200 });
case "already_dispatched":
// Lost the double-dispatch CAS (or the combiner is already running) — a
// benign no-op; the client just keeps polling and sees `combining`.
return NextResponse.json({ alreadyDispatched: true }, { status: 200 });
case "not_found":
return NextResponse.json({ error: "Not found" }, { status: 404 });
case "missing_task":

View file

@ -1,160 +1,46 @@
import { db } from "@/app/db/db";
import { property } from "@/app/db/schema/property";
import { sql } from "drizzle-orm";
import { NextRequest, NextResponse } from "next/server";
import { revalidatePath } from "next/cache";
import { getServerSession } from "next-auth";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
import { createRetrofitDataS3Client } from "@/app/utils/s3";
import * as XLSX from "xlsx";
import { loadForFinalize, markFinalized } from "@/lib/bulkUpload/server";
const ADDRESS_COLS = ["Address 1", "Address 2", "Address 3"] as const;
const POSTCODE_COL = "postcode";
const INTERNAL_REF_COL = "Internal Reference";
const UPRN_COL = "address2uprn_uprn";
const MATCHED_ADDRESS_COL = "address2uprn_address";
const LEXISCORE_COL = "address2uprn_lexiscore";
const MISSING_SENTINEL = "invalid postcode";
const UK_POSTCODE_RE = /[A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2}/i;
function normalize(v: unknown): string {
if (v === null || v === undefined) return "";
return String(v).trim();
}
function isMissing(v: string): boolean {
return v === "" || v.toLowerCase() === MISSING_SENTINEL;
}
function parseUprn(raw: unknown): bigint | null {
const v = normalize(raw);
if (isMissing(v)) return null;
try {
return BigInt(v);
} catch {
return null;
}
}
function parseLexiscore(raw: unknown): number | null {
const v = normalize(raw);
if (isMissing(v)) return null;
const n = Number(v);
return Number.isFinite(n) ? n : null;
}
function extractPostcode(matched: string | null, fallback: string): string | null {
if (matched) {
const m = matched.match(UK_POSTCODE_RE);
if (m) return m[0].toUpperCase();
}
return fallback || null;
}
function parseS3Uri(uri: string): { bucket: string; key: string } | null {
if (!uri.startsWith("s3://")) return null;
const rest = uri.slice(5);
const slash = rest.indexOf("/");
if (slash < 0) return null;
return { bucket: rest.slice(0, slash), key: rest.slice(slash + 1) };
}
import { readSessionToken } from "@/lib/session";
import { dispatchFinaliser } from "@/lib/bulkUpload/server";
// Finalise is now asynchronous (ADR-0005). This route no longer inserts
// properties; it dispatches the bulk_upload_finaliser Lambda and flips the
// BulkUpload to `finalising` via a compare-and-swap (the double-dispatch guard).
// The Lambda reads the combiner output, inserts the property rows, and writes the
// terminal `complete`/`failed` status directly. The user sees "Uploading to ARA"
// while the row is `finalising`; the onboarding surface polls for the outcome.
export async function POST(
_request: NextRequest,
request: NextRequest,
{ params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
) {
const session = await getServerSession(AuthOptions);
if (!session) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
const { uploadId } = await params;
const sessionToken = readSessionToken(request);
const guarded = await loadForFinalize(uploadId);
switch (guarded.kind) {
const result = await dispatchFinaliser({ uploadId, sessionToken });
switch (result.kind) {
case "ok":
// Accepted: the finaliser is running; the row is now `finalising`.
return NextResponse.json({ taskId: result.taskId }, { status: 202 });
case "not_found":
return NextResponse.json({ error: "Not found" }, { status: 404 });
case "already_finalized":
// Idempotent: nothing to do.
return new NextResponse(null, { status: 200 });
case "wrong_state":
return NextResponse.json(
{ error: `Upload not ready to finalize (state: ${guarded.current})` },
{ status: 409 }
);
case "not_yet_combined":
return NextResponse.json({ error: "Combiner not finished" }, { status: 409 });
}
const upload = guarded.upload;
const parsed = parseS3Uri(upload.combinedOutputS3Uri!);
if (!parsed) {
return NextResponse.json({ error: "Invalid combined output S3 URI" }, { status: 500 });
}
const s3 = createRetrofitDataS3Client();
let rawRows: Record<string, unknown>[];
try {
const obj = await s3
.getObject({ Bucket: parsed.bucket, Key: parsed.key })
.promise();
const buf = Buffer.from(obj.Body as Uint8Array);
const wb = XLSX.read(buf, { type: "buffer" });
const sheet = wb.Sheets[wb.SheetNames[0]];
rawRows = XLSX.utils.sheet_to_json<Record<string, unknown>>(sheet, { defval: "" });
} catch (err) {
console.error("Failed to read combined CSV from S3:", err);
return NextResponse.json({ error: "Failed to read combined CSV" }, { status: 502 });
}
const portfolioIdBig = BigInt(upload.portfolioId);
const values = rawRows.map((raw) => {
const userInputtedAddress =
ADDRESS_COLS.map((c) => normalize(raw[c])).filter(Boolean).join(", ") || null;
const userInputtedPostcode = normalize(raw[POSTCODE_COL]) || null;
const uprn = parseUprn(raw[UPRN_COL]);
const matchedAddressRaw = normalize(raw[MATCHED_ADDRESS_COL]);
const matchedAddress = isMissing(matchedAddressRaw) ? null : matchedAddressRaw;
const address = matchedAddress ?? userInputtedAddress;
const postcode = extractPostcode(matchedAddress, userInputtedPostcode ?? "");
const internalRef = normalize(raw[INTERNAL_REF_COL]) || null;
const lexiscore = parseLexiscore(raw[LEXISCORE_COL]);
return {
portfolioId: portfolioIdBig,
creationStatus: "READY" as const,
uprn,
landlordPropertyId: internalRef,
address,
postcode,
userInputtedAddress,
userInputtedPostcode,
lexiscore,
};
});
try {
if (values.length > 0) {
await db
.insert(property)
.values(values)
.onConflictDoNothing({
target: [property.portfolioId, property.uprn],
where: sql`${property.uprn} IS NOT NULL`,
});
}
await markFinalized(uploadId);
revalidatePath("/portfolio/[slug]", "layout");
return new NextResponse(null, { status: 200 });
} catch (err) {
console.error("Failed to finalize bulk upload:", err);
return NextResponse.json({ error: "Failed to import properties" }, { status: 500 });
case "missing_task":
return NextResponse.json({ error: "Upload has no task to finalise" }, { status: 409 });
case "wrong_state":
return NextResponse.json(
{ error: `Upload not ready to finalize (state: ${result.current})` },
{ status: 409 }
);
case "trigger_failed":
return NextResponse.json({ error: result.message }, { status: result.status });
}
}

View file

@ -0,0 +1,49 @@
import { setMultiEntryOrdering } from "@/lib/bulkUpload/server";
import { NextRequest, NextResponse } from "next/server";
import { getServerSession } from "next-auth";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
import { z } from "zod";
const PatchSchema = z.object({
// entry-count -> permutation (part slot -> file position). See ADR-0004.
permutations: z.record(z.string(), z.array(z.number().int().nonnegative())),
});
export async function PATCH(
request: NextRequest,
{ params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
) {
const session = await getServerSession(AuthOptions);
if (!session) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
const { uploadId } = await params;
let body;
try {
body = PatchSchema.parse(await request.json());
} catch {
return NextResponse.json({ error: "Invalid input" }, { status: 400 });
}
try {
const result = await setMultiEntryOrdering(uploadId, body.permutations);
switch (result.kind) {
case "ok":
return NextResponse.json(result.upload, { status: 200 });
case "not_found":
return NextResponse.json({ error: "Not found" }, { status: 404 });
case "wrong_state":
return NextResponse.json(
{ error: `Cannot set ordering in state '${result.current}'` },
{ status: 409 }
);
case "not_multi_entry":
return NextResponse.json({ error: "Upload has no multi-entry rows" }, { status: 409 });
case "invalid_ordering":
return NextResponse.json({ error: result.reason }, { status: 422 });
}
} catch (error) {
console.error("Failed to save multi-entry ordering:", error);
return NextResponse.json({ error: "Internal server error" }, { status: 500 });
}
}

View file

@ -1,39 +1,35 @@
import { NextRequest, NextResponse } from "next/server";
import { getServerSession } from "next-auth";
import { randomUUID } from "node:crypto";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
import { createS3Client, createRetrofitDataS3Client, retrofitDataS3Bucket } from "@/app/utils/s3";
import * as XLSX from "xlsx";
import { loadForAddressMatching, triggerAddressMatching } from "@/lib/bulkUpload/server";
import { loadForAddressMatching, saveMultiEntrySummary, triggerAddressMatching, triggerClassifier } from "@/lib/bulkUpload/server";
import { readSessionToken } from "@/lib/session";
import { ADDRESS_FIELDS, classifierMapping } from "@/lib/bulkUpload/columnFields";
import { addressCsvKey, classifierCsvKey, SOURCE_ROW_ID_COLUMN } from "@/lib/bulkUpload/s3Keys";
import { detectMultiEntry } from "@/lib/bulkUpload/multiEntry";
const FIELD_RENAME: Record<string, string> = {
address_1: "Address 1",
address_2: "Address 2",
address_3: "Address 3",
postcode: "postcode",
internal_reference: "Internal Reference",
};
type SheetRow = Record<string, unknown>;
function transformFile(
buffer: Buffer,
columnMapping: Record<string, string>
): { csv: string; error?: never } | { csv?: never; error: string } {
function readRows(buffer: Buffer): SheetRow[] {
const wb = XLSX.read(buffer, { type: "buffer" });
const sheet = wb.Sheets[wb.SheetNames[0]];
const rows = XLSX.utils.sheet_to_json<Record<string, unknown>>(sheet, { defval: "" });
return XLSX.utils.sheet_to_json<SheetRow>(sheet, { defval: "" });
}
if (rows.length === 0) return { error: "Empty file" };
const sourceHeaders = Object.keys(rows[0]);
// Address-matching CSV: address fields only, renamed to canonical headers.
function buildAddressCsv(
rows: SheetRow[],
columnMapping: Record<string, string> // field → source header
): { csv: string; error?: never } | { csv?: never; error: string } {
const outputHeaders: string[] = [];
const sourceToOutput: Record<string, string> = {};
for (const src of sourceHeaders) {
const mapped = columnMapping[src];
if (!mapped || mapped === "skip") continue;
const renamed = FIELD_RENAME[mapped] ?? mapped;
outputHeaders.push(renamed);
sourceToOutput[src] = renamed;
const outputToSource: Record<string, string> = {};
for (const field of ADDRESS_FIELDS) {
const src = columnMapping[field.value];
if (!src || !field.outputHeader) continue;
outputHeaders.push(field.outputHeader);
outputToSource[field.outputHeader] = src;
}
if (!outputHeaders.includes("Address 1"))
@ -41,11 +37,17 @@ function transformFile(
if (!outputHeaders.includes("postcode"))
return { error: 'Mapping must include "postcode"' };
// Carry the synthetic per-row join key through to the combiner output, so the
// finaliser can re-associate a UPRN-matched row with its classifier
// descriptions (ADR-0006). It rides `address2uprn` as a preserved input column.
outputHeaders.push(SOURCE_ROW_ID_COLUMN);
const outputRows = rows.map((row) => {
const out: Record<string, unknown> = {};
for (const [src, renamed] of Object.entries(sourceToOutput)) {
out[renamed] = row[src] ?? "";
const out: SheetRow = {};
for (const [outName, src] of Object.entries(outputToSource)) {
out[outName] = row[src] ?? "";
}
out[SOURCE_ROW_ID_COLUMN] = row[SOURCE_ROW_ID_COLUMN] ?? "";
return out;
});
@ -53,6 +55,32 @@ function transformFile(
return { csv: XLSX.utils.sheet_to_csv(outSheet) };
}
// Classifier CSV: the mapped classifier source columns only, original headers
// preserved (the lambda resolves them via column_mapping). Converting here means
// the classifier always reads a real CSV even when the upload was .xlsx/.xls —
// see ADR-0003. One source header may feed several categories, so dedupe to
// distinct headers.
function buildClassifierCsv(
rows: SheetRow[],
classifierMap: Record<string, string> // category → source header
): string {
const sourceHeaders = [...new Set(Object.values(classifierMap))];
// Emit the synthetic join key alongside the classifier columns so the
// finaliser can join this row's descriptions to its combiner identity by
// `source_row_id` (ADR-0006). `buildClassifierCsv` projects a fixed column
// set, so the key must be added explicitly — attaching it to the row is not
// enough.
const headers = [...sourceHeaders, SOURCE_ROW_ID_COLUMN];
const outputRows = rows.map((row) => {
const out: SheetRow = {};
for (const h of sourceHeaders) out[h] = row[h] ?? "";
out[SOURCE_ROW_ID_COLUMN] = row[SOURCE_ROW_ID_COLUMN] ?? "";
return out;
});
const outSheet = XLSX.utils.json_to_sheet(outputRows, { header: headers });
return XLSX.utils.sheet_to_csv(outSheet);
}
export async function POST(
request: NextRequest,
{ params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
@ -91,11 +119,29 @@ export async function POST(
return NextResponse.json({ error: "Failed to read source file" }, { status: 500 });
}
const transformed = transformFile(fileBuffer, upload.columnMapping!);
const parsedRows = readRows(fileBuffer);
if (parsedRows.length === 0)
return NextResponse.json({ error: "Empty file" }, { status: 422 });
// Mint a stable synthetic id per source row, here at the one point both CSVs
// are built from the same array, and write it into both. It is the finaliser's
// join key between the combiner output (identity) and the classifier CSV
// (descriptions) — see ADR-0006. Deterministic ordering is not required: both
// CSVs are produced together in this handler, so they always share values.
const rows = parsedRows.map((row) => ({
...row,
[SOURCE_ROW_ID_COLUMN]: randomUUID(),
}));
// Detect multi-entry building parts now, while the whole file is parsed in
// memory, so the awaiting_review surface never re-reads it (ADR-0004).
await saveMultiEntrySummary(uploadId, detectMultiEntry(rows, upload.columnMapping!));
const transformed = buildAddressCsv(rows, upload.columnMapping!);
if (transformed.error)
return NextResponse.json({ error: transformed.error }, { status: 422 });
const transformedKey = `bulk_onboarding_inputs/${portfolioId}/${uploadId}.csv`;
const transformedKey = addressCsvKey(portfolioId, uploadId);
try {
await outputS3
.putObject({
@ -112,13 +158,37 @@ export async function POST(
const s3Uri = `s3://${outputBucket}/${transformedKey}`;
const trigger = await triggerAddressMatching({
uploadId,
s3Uri,
sessionToken: readSessionToken(request),
});
// Convert the mapped classifier columns to their own CSV so the classifier
// lambda always parses a real CSV, never the raw upload (which may be
// .xlsx/.xls). Only when the user mapped ≥1 classifier column. See ADR-0003.
const classifierMap = classifierMapping(upload.columnMapping!);
let classifierS3Uri: string | undefined;
if (Object.keys(classifierMap).length > 0) {
const classifierKey = classifierCsvKey(portfolioId, uploadId);
try {
await outputS3
.putObject({
Bucket: outputBucket,
Key: classifierKey,
Body: buildClassifierCsv(rows, classifierMap),
ContentType: "text/csv",
})
.promise();
classifierS3Uri = `s3://${outputBucket}/${classifierKey}`;
} catch (err) {
// Non-blocking: classification is skipped, address matching proceeds.
console.error("Failed to upload classifier CSV:", err);
}
}
const sessionToken = readSessionToken(request);
const trigger = await triggerAddressMatching({ uploadId, s3Uri, sessionToken });
if (trigger.kind === "trigger_failed")
return NextResponse.json({ error: trigger.message }, { status: trigger.status });
// Co-fire the landlord classifier (non-blocking) under the same task.
if (classifierS3Uri)
await triggerClassifier({ taskId: trigger.taskId, uploadId, s3Uri: classifierS3Uri, sessionToken });
return NextResponse.json({ taskId: trigger.taskId }, { status: 200 });
}

View file

@ -0,0 +1,35 @@
import { setVerifyAck } from "@/lib/bulkUpload/server";
import { NextRequest, NextResponse } from "next/server";
import { getServerSession } from "next-auth";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
// Records the "Verify classification" acknowledgement (ADR-0004 Step 1). No
// body — the per-row corrections go through the classifications PATCH; this just
// marks that the user has checked the sample and unlocks Finalise.
export async function PATCH(
request: NextRequest,
{ params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
) {
const session = await getServerSession(AuthOptions);
if (!session) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
const { uploadId } = await params;
try {
const result = await setVerifyAck(uploadId);
switch (result.kind) {
case "ok":
return NextResponse.json(result.upload, { status: 200 });
case "not_found":
return NextResponse.json({ error: "Not found" }, { status: 404 });
case "wrong_state":
return NextResponse.json(
{ error: `Cannot verify classification in state '${result.current}'` },
{ status: 409 }
);
}
} catch (error) {
console.error("Failed to record classification verification:", error);
return NextResponse.json({ error: "Internal server error" }, { status: 500 });
}
}

View file

@ -22,17 +22,26 @@ export interface MultiEntrySummary {
multiValuedFields: string[];
countDistribution: Record<string, number>;
largestCount: number;
// Step 1 (verify) sample: the largest-count row when multi-entry, else the
// first classified row. `null` ⇒ nothing to verify.
sample: MultiEntrySample | null;
// Step 2 (order): one sample per distinct entry-count ≥ 2 present in the file,
// keyed by count. Each count needs its OWN confirmed permutation — a smaller
// count's ordering can't be derived from a larger one (ADR-0004, amended
// 2026-06-05). Absent on uploads detected before that amendment.
samplesByCount?: Record<string, MultiEntrySample>;
}
// User-confirmed building-part ordering (ADR-0004). Keyed by entry-count so it
// can hold more than one count later; this iteration populates only the
// largest. permutations[count][k] = the 0-based file position holding building
// part k, where 0 = Main building, 1..N-1 = Extension 1..N-1.
// User-confirmed building-part ordering (ADR-0004, amended 2026-06-05). Keyed by
// entry-count: a permutation is captured for EVERY distinct count ≥ 2 in the
// file (the v2 fact layer can't derive one count's order from another).
// permutations[count][k] = the 0-based file position holding building part k,
// where 0 = Main building, 1..N-1 = Extension 1..N-1.
// e.g. { "2": [1, 0] } => for 2-part rows the main building is file position 1.
export interface MultiEntryOrdering {
permutations: Record<string, number[]>;
// Set once the user confirms; gates Finalise when the upload is multi-entry.
// True once EVERY detected count ≥ 2 has a permutation; gates Finalise when the
// upload is multi-entry.
confirmed: boolean;
}
@ -46,16 +55,8 @@ export const bulkAddressUploads = pgTable("bulk_address_uploads", {
status: text("status").notNull().default("ready_for_processing"),
sourceHeaders: text("source_headers").array().notNull().default(sql`'{}'`),
columnMapping: jsonb("column_mapping").$type<Record<string, string>>(),
// Multi-entry building-part detection, computed at start-address-matching
// and read by the awaiting_review review surface (ADR-0004).
multiEntrySummary: jsonb("multi_entry_summary").$type<MultiEntrySummary>(),
// User-confirmed building-part ordering for the multi-entry sample (ADR-0004).
multiEntryOrdering: jsonb("multi_entry_ordering").$type<MultiEntryOrdering>(),
// Step 1 "Verify classification" acknowledgement (ADR-0004). Gates Finalise
// whenever the upload has classifier columns — independent of multi-entry, so
// it lives in its own column rather than on multiEntryOrdering. Set true by
// the verify-classification route once the user confirms the sample's
// description→enum mapping.
verifyAck: boolean("verify_ack").notNull().default(false),
taskId: uuid("task_id"),
combinedOutputS3Uri: text("combined_output_s3_uri"),

View file

@ -1,18 +1,59 @@
"use client";
import { useState } from "react";
import { useRouter } from "next/navigation";
import Link from "next/link";
import { ArrowRightIcon } from "@heroicons/react/24/outline";
import { ArrowRightIcon, CheckCircleIcon } from "@heroicons/react/24/outline";
import {
useBulkUploadProgress,
useConfirmMultiEntryOrdering,
useConfirmVerification,
useEditClassification,
useFinalize,
useRequestCombine,
useSampleClassifications,
type SampleClassifications,
} from "@/lib/bulkUpload/client";
import {
partLabel,
isPermutation,
assignmentToPermutation,
type MultiEntrySample,
} from "@/lib/bulkUpload/multiEntry";
import type { MultiEntryOrdering } from "@/app/db/schema/bulk_address_uploads";
import {
PropertyTypeValues,
BuiltFormTypeValues,
WallTypeValues,
RoofTypeValues,
} from "@/app/db/schema/landlord_overrides";
import { CLASSIFIER_FIELDS } from "@/lib/bulkUpload/columnFields";
import { statusLabel, isTerminalStatus } from "@/lib/bulkUpload/types";
// Valid enum options per classifier category, for the editable dropdowns (#299).
const CATEGORY_VALUES: Record<string, readonly string[]> = {
property_type: PropertyTypeValues,
built_form_type: BuiltFormTypeValues,
wall_type: WallTypeValues,
roof_type: RoofTypeValues,
};
// Our category label per classifier field (e.g. built_form_type → "Built Form").
// Distinguishes the categories when several read from the same source column —
// Property Type and Built Form both come from one column, so labelling by the
// customer's header alone is ambiguous.
const FIELD_LABEL: Record<string, string> = Object.fromEntries(
CLASSIFIER_FIELDS.map((f) => [f.value, f.label]),
);
interface Props {
portfolioSlug: string;
portfolioId: string;
uploadId: string;
// The status at the last server render. Used to refresh the server page exactly
// once when polling first observes a terminal status (async finalise, ADR-0005),
// so the page advances from "Uploading to ARA" to the "Processing complete" card.
serverStatus: string;
isDomnaUser: boolean;
}
@ -23,13 +64,33 @@ export default function OnboardingProgress({
portfolioSlug,
portfolioId,
uploadId,
serverStatus,
isDomnaUser,
}: Props) {
const router = useRouter();
const progress = useBulkUploadProgress(portfolioId, uploadId);
const progress = useBulkUploadProgress(portfolioId, uploadId, {
// When the async finaliser finishes, the poll flips the status to a terminal
// value while the server page is still on `finalising`. Refresh once so the
// server re-renders the "Processing complete" / "failed" card. Guarding on the
// non-terminal serverStatus prevents a refresh loop: after the refresh the
// prop is terminal, so this no-ops.
onSuccess: (data) => {
if (!isTerminalStatus(serverStatus) && isTerminalStatus(data.upload.status)) {
router.refresh();
}
},
});
const combine = useRequestCombine(portfolioId, uploadId);
const finalize = useFinalize(portfolioId, uploadId);
// Read-only classifications for the multi-entry sample (issue #298). Fetched
// only once a sample exists at awaiting_review. Hook stays above the early
// returns so its order is stable.
const sampleReady =
progress.data?.upload.status === "awaiting_review" &&
!!progress.data.upload.multiEntrySummary?.sample;
const classifications = useSampleClassifications(portfolioId, uploadId, sampleReady);
if (progress.isError) return null;
if (!progress.data) {
return (
@ -41,19 +102,61 @@ export default function OnboardingProgress({
}
const { task, upload } = progress.data;
const total = task?.totalSubtasks ?? 0;
const completedSubtasks = task?.completedSubtasks ?? 0;
const failedSubtasks = task?.failedSubtasks ?? 0;
// Address-matching batches drive the bar; classification is shown separately.
const total = task?.addressTotal ?? 0;
const completedSubtasks = task?.addressCompleted ?? 0;
const failedSubtasks = task?.addressFailed ?? 0;
const percent = total > 0 ? Math.round((completedSubtasks / total) * 100) : 0;
const classifierTotal = task?.classifierTotal ?? 0;
const classifierCompleted = task?.classifierCompleted ?? 0;
const classifierFailed = task?.classifierFailed ?? 0;
const taskStatus = task?.status.toLowerCase() ?? "";
const taskDone = TASK_TERMINAL_STATUSES.has(taskStatus);
const taskFailed = TASK_FAILED_STATUSES.has(taskStatus);
const isCombining = upload.status === "combining";
const isImporting = upload.status === "awaiting_review";
// Async finaliser in flight (ADR-0005). Polling continues (non-terminal) until
// the backend writes complete/failed. Surfaced to the user as "Uploading to ARA".
const isFinalising = upload.status === "finalising";
const canRunCombiner = taskDone && !taskFailed && upload.status === "processing";
const canFinalize = upload.status === "awaiting_review";
const isAwaitingReview = upload.status === "awaiting_review";
// Two-step review on the awaiting_review surface (ADR-0004). The sample exists
// whenever classifier columns were mapped; multi-entry is largestCount >= 2.
// Step 1 (verify) applies whenever there's a sample; Step 2 (order) only when
// multi-entry. Each gates Finalise where it applies.
const sample = isAwaitingReview ? (upload.multiEntrySummary?.sample ?? null) : null;
const isMultiEntry = (upload.multiEntrySummary?.largestCount ?? 0) >= 2;
const verifyAck = upload.verifyAck ?? false;
const orderingConfirmed = upload.multiEntryOrdering?.confirmed ?? false;
const needsVerify = !!sample;
const needsOrdering = !!sample && isMultiEntry;
// One ordering panel per distinct count ≥ 2, ascending (ADR-0004 amendment).
// Fall back to the single Step-1 sample for uploads detected before per-count
// capture existed (samplesByCount absent).
const samplesByCount = upload.multiEntrySummary?.samplesByCount;
const orderingSamples: Array<[string, MultiEntrySample]> =
samplesByCount && Object.keys(samplesByCount).length > 0
? Object.entries(samplesByCount).sort(([a], [b]) => Number(a) - Number(b))
: sample && isMultiEntry
? [[String(sample.count), sample]]
: [];
const showStepNumbers = needsVerify && needsOrdering;
// Descriptions still classified `Unknown` block Finalise — the user must map
// every one to a real value, else the finaliser fails loudly (ADR-0006).
const unknownByField = classifications.data?.unknown ?? {};
const unknownTotal = Object.values(unknownByField).reduce(
(n, descriptions) => n + descriptions.length,
0,
);
const canFinalize =
isAwaitingReview &&
(!needsVerify || verifyAck) &&
(!needsOrdering || orderingConfirmed) &&
unknownTotal === 0;
return (
<div className="mt-6 space-y-3">
@ -65,21 +168,40 @@ export default function OnboardingProgress({
</div>
<div className="flex items-center gap-4 text-xs text-gray-500">
{total > 0 && (
<span>
<span className="font-semibold text-gray-700">{completedSubtasks}</span> / {total} batches complete
</span>
)}
{failedSubtasks > 0 && (
<span className="flex items-center gap-1 text-red-500 font-semibold">
<span className="w-1.5 h-1.5 rounded-full bg-red-400" />
{failedSubtasks} failed
</span>
)}
{!taskDone && (
<span className="flex items-center gap-1 text-blue-500">
<span className="w-1.5 h-1.5 rounded-full bg-blue-400 animate-pulse" />
Running
{/* Address matching: standardises addresses against the OS lookup, in batches. */}
<span className="flex items-center gap-1">
<span className="text-gray-400">Address matching:</span>
{failedSubtasks > 0 ? (
<span className="flex items-center gap-1 text-red-500 font-semibold">
<span className="w-1.5 h-1.5 rounded-full bg-red-400" />
{failedSubtasks} of {total} batches failed
</span>
) : total > 0 && completedSubtasks >= total ? (
<span className="font-semibold text-green-600">complete</span>
) : (
<span className="flex items-center gap-1 text-blue-500">
<span className="w-1.5 h-1.5 rounded-full bg-blue-400 animate-pulse" />
running{total > 0 ? ` · ${completedSubtasks} / ${total} batches` : ""}
</span>
)}
</span>
{/* Classification: turns the landlord's free-text descriptions into EPC categories. */}
{classifierTotal > 0 && (
<span className="flex items-center gap-1">
<span className="text-gray-400">Classification:</span>
{classifierFailed > 0 ? (
<span className="flex items-center gap-1 text-red-500 font-semibold">
<span className="w-1.5 h-1.5 rounded-full bg-red-400" />
failed
</span>
) : classifierCompleted >= classifierTotal ? (
<span className="font-semibold text-green-600">complete</span>
) : (
<span className="flex items-center gap-1 text-blue-500">
<span className="w-1.5 h-1.5 rounded-full bg-blue-400 animate-pulse" />
running
</span>
)}
</span>
)}
{isCombining && (
@ -94,9 +216,58 @@ export default function OnboardingProgress({
Awaiting import
</span>
)}
{isFinalising && (
<span className="flex items-center gap-1 text-blue-500">
<span className="w-1.5 h-1.5 rounded-full bg-blue-400 animate-pulse" />
{statusLabel("finalising")}
</span>
)}
</div>
{(canRunCombiner || canFinalize) && (
{needsVerify && sample && (
<VerifyClassificationPanel
sample={sample}
classifications={classifications.data?.classifications ?? {}}
verified={verifyAck}
stepLabel={showStepNumbers ? "Step 1" : undefined}
portfolioId={portfolioId}
uploadId={uploadId}
/>
)}
{isAwaitingReview && unknownTotal > 0 && (
<UnresolvedClassificationsPanel
unknown={unknownByField}
portfolioId={portfolioId}
uploadId={uploadId}
/>
)}
{needsOrdering && orderingSamples.length > 0 && (
<div className="space-y-3">
{orderingSamples.map(([count, orderSample], i) => (
<MultiEntryOrderingPanel
key={count}
sample={orderSample}
ordering={upload.multiEntryOrdering ?? null}
classifications={classifications.data?.classifications ?? {}}
// Number the panels only when there's also a verify step or more
// than one count, so a lone ordering panel stays unnumbered.
stepLabel={
showStepNumbers
? `Step ${i + 2}`
: orderingSamples.length > 1
? `Part group ${i + 1}`
: undefined
}
portfolioId={portfolioId}
uploadId={uploadId}
/>
))}
</div>
)}
{(canRunCombiner || isAwaitingReview) && (
<div className="flex flex-col gap-2 pt-2">
{canRunCombiner && (
<StageButton
@ -106,11 +277,19 @@ export default function OnboardingProgress({
onClick={() => combine.mutate()}
/>
)}
{canFinalize && (
{isAwaitingReview && (
<StageButton
label="Finalise"
activeLabel="Finalising…"
isPending={finalize.isPending}
disabled={!canFinalize}
disabledReason={
unknownTotal > 0
? `Resolve ${unknownTotal} unclassified description${unknownTotal === 1 ? "" : "s"} first`
: needsVerify && !verifyAck
? "Verify the classification first"
: "Confirm the building-part order first"
}
onClick={() =>
finalize.mutate(undefined, { onSuccess: () => router.refresh() })
}
@ -141,23 +320,395 @@ export default function OnboardingProgress({
);
}
// Step 1 — Verify classification (ADR-0004). Lists how we read the sample
// address's descriptions (per mapped classifier column) and lets the user
// correct any, written back as source='user'. Acknowledging unlocks Finalise.
// Shown whenever classifier columns were mapped, multi-entry or not.
function VerifyClassificationPanel({
sample,
classifications,
verified,
stepLabel,
portfolioId,
uploadId,
}: {
sample: MultiEntrySample;
classifications: SampleClassifications;
verified: boolean;
stepLabel?: string;
portfolioId: string;
uploadId: string;
}) {
const editClassification = useEditClassification(portfolioId, uploadId);
const confirm = useConfirmVerification(portfolioId, uploadId);
return (
<div className="rounded-lg border border-amber-200 bg-amber-50 px-4 py-3">
<p className="text-sm font-semibold text-amber-900">
{stepLabel ? `${stepLabel}` : ""}Verify classification
</p>
<p className="mt-0.5 text-xs text-amber-800">
Based on your column mapping, here&apos;s how we read{" "}
{sample.address ? <span className="font-medium">{sample.address}</span> : "one sample address"}.
Correct anything that&apos;s wrong, then confirm.
</p>
<div className="mt-3 space-y-3">
{sample.columns.map((column) => {
// One editable row per distinct description, in file order.
const seen = new Set<string>();
const entries = column.entries.filter((entry) => {
if (seen.has(entry.description)) return false;
seen.add(entry.description);
return true;
});
const options = CATEGORY_VALUES[column.field] ?? [];
return (
<div key={column.field}>
<p className="text-[11px] font-semibold uppercase tracking-wide text-amber-700">
{FIELD_LABEL[column.field] ?? column.field}
<span className="ml-1.5 font-normal normal-case tracking-normal text-amber-600">
from your &ldquo;{column.header}&rdquo; column
</span>
</p>
<div className="mt-1 space-y-1">
{entries.map((entry) => {
const classified = classifications[column.field]?.[entry.description] ?? "";
return (
<div
key={entry.description}
className="flex items-center gap-2 text-xs text-amber-900"
>
<span className="min-w-0 flex-1 truncate" title={entry.raw}>
{entry.raw}
</span>
<span className="text-amber-400"></span>
<select
value={classified}
onChange={(e) =>
editClassification.mutate({
field: column.field,
description: entry.description,
value: e.target.value,
})
}
disabled={editClassification.isPending}
className="max-w-[14rem] rounded border border-amber-300 bg-white px-1.5 py-0.5 text-[11px] text-amber-900"
>
<option value="" disabled>
{classified ? classified : "not classified"}
</option>
{options.map((opt) => (
<option key={opt} value={opt}>
{opt}
</option>
))}
</select>
</div>
);
})}
</div>
</div>
);
})}
</div>
<p className="mt-2 text-[11px] text-amber-700">
Correcting a classification updates that description for{" "}
<span className="font-medium">every</span> row across the portfolio, not
just this address.
</p>
{editClassification.error && (
<p className="mt-1 text-xs text-red-600">{editClassification.error.message}</p>
)}
<div className="mt-3 flex items-center gap-3">
<button
onClick={() => confirm.mutate()}
disabled={confirm.isPending}
className={`inline-flex items-center gap-1.5 rounded-lg px-4 py-1.5 text-xs font-bold text-white transition-opacity ${
confirm.isPending ? "bg-amber-300 cursor-not-allowed" : "bg-amber-600 hover:opacity-90"
}`}
>
{confirm.isPending ? "Saving…" : verified ? "Re-confirm" : "These look right"}
</button>
{verified && !confirm.isPending && (
<span className="inline-flex items-center gap-1 text-xs font-semibold text-green-600">
<CheckCircleIcon className="h-4 w-4" />
Classification verified
</span>
)}
</div>
{confirm.error && (
<p className="mt-2 text-xs text-red-600">{confirm.error.message}</p>
)}
</div>
);
}
// Interactive building-part ordering for ONE entry-count's sample (ADR-0004,
// amended 2026-06-05 — one panel per distinct count). The user labels each file
// position with a building part (one Main building + Extensions); the labels
// must form a permutation. Confirming persists this count's ordering (merged
// server-side with the other counts'); Finalise unlocks once every count is
// confirmed.
function MultiEntryOrderingPanel({
sample,
ordering,
classifications,
stepLabel,
portfolioId,
uploadId,
}: {
sample: MultiEntrySample;
ordering: MultiEntryOrdering | null;
classifications: SampleClassifications;
stepLabel?: string;
portfolioId: string;
uploadId: string;
}) {
const confirm = useConfirmMultiEntryOrdering(portfolioId, uploadId);
const count = sample.count;
// Only the multi-valued columns are sliced into building parts; single-value
// columns are whole-dwelling facts (verified in Step 1, not ordered here).
const orderColumns = sample.columns.filter((column) => column.entries.length > 1);
// assignment[filePosition] = building-part slot. Seed from a stored ordering
// (slot -> position, so invert) or default to identity (main building first).
const [assignment, setAssignment] = useState<number[]>(() => {
const stored = ordering?.permutations?.[String(count)];
if (stored && stored.length === count) {
const seeded = new Array<number>(count);
stored.forEach((position, slot) => {
seeded[position] = slot;
});
return seeded;
}
return Array.from({ length: count }, (_, i) => i);
});
// Per-panel confirmation reflects whether THIS count's permutation is stored,
// not the global all-counts-confirmed flag — so each panel gives its own
// feedback as the user works through them.
const confirmed = Array.isArray(ordering?.permutations?.[String(count)]);
const valid = isPermutation(assignment);
const setSlot = (position: number, slot: number) =>
setAssignment((prev) => prev.map((s, i) => (i === position ? slot : s)));
const onConfirm = () => {
if (!valid) return;
confirm.mutate({
permutations: { [String(count)]: assignmentToPermutation(assignment) },
});
};
return (
<div className="rounded-lg border border-amber-200 bg-amber-50 px-4 py-3">
<p className="text-sm font-semibold text-amber-900">
{stepLabel ? `${stepLabel}` : ""}Confirm building-part order
</p>
<p className="mt-0.5 text-xs text-amber-800">
{sample.address ? <span className="font-medium">{sample.address}</span> : "An address"}{" "}
has {count} building parts. Tell us which entry is the main building and
which are extensions we&apos;ll apply the same order to every{" "}
{count}-part row in this file.
</p>
<div className="mt-3 overflow-x-auto">
<table className="w-full border-collapse text-xs">
<thead>
<tr className="text-left text-amber-700">
<th className="py-1 pr-3 font-medium">Entry</th>
{orderColumns.map((column) => (
<th key={column.field} className="py-1 pr-3 font-medium">
{FIELD_LABEL[column.field] ?? column.header}
<span className="block text-[10px] font-normal text-amber-500">
your &ldquo;{column.header}&rdquo;
</span>
</th>
))}
<th className="py-1 pr-3 font-medium">Building part</th>
</tr>
</thead>
<tbody>
{Array.from({ length: count }).map((_, position) => (
<tr key={position} className="border-t border-amber-100 text-amber-900">
<td className="py-1 pr-3 text-amber-600">{position + 1}</td>
{orderColumns.map((column) => {
const entry = column.entries[position];
const classified = entry
? classifications[column.field]?.[entry.description] ?? ""
: "";
return (
<td key={column.field} className="py-1 pr-3 align-top">
<div>{entry?.raw ?? "—"}</div>
{/* Read-only classification annotation; edit it in Step 1. */}
{entry && (
<div className="mt-0.5 text-[11px] text-amber-600">
{classified || "not classified"}
</div>
)}
</td>
);
})}
<td className="py-1 pr-3">
<select
value={assignment[position]}
onChange={(e) => setSlot(position, Number(e.target.value))}
className="rounded border border-amber-300 bg-white px-2 py-1 text-xs text-amber-900"
>
{Array.from({ length: count }).map((_, slot) => (
<option key={slot} value={slot}>
{partLabel(slot)}
</option>
))}
</select>
</td>
</tr>
))}
</tbody>
</table>
</div>
{!valid && (
<p className="mt-2 text-xs text-red-600">
Each part (Main building, Extension 1, ) must be used exactly once.
</p>
)}
<div className="mt-3 flex items-center gap-3">
<button
onClick={onConfirm}
disabled={!valid || confirm.isPending}
className={`inline-flex items-center gap-1.5 rounded-lg px-4 py-1.5 text-xs font-bold text-white transition-opacity ${
!valid || confirm.isPending
? "bg-amber-300 cursor-not-allowed"
: "bg-amber-600 hover:opacity-90"
}`}
>
{confirm.isPending ? "Saving…" : confirmed ? "Update order" : "Confirm order"}
</button>
{confirmed && !confirm.isPending && (
<span className="inline-flex items-center gap-1 text-xs font-semibold text-green-600">
<CheckCircleIcon className="h-4 w-4" />
Order confirmed
</span>
)}
</div>
{confirm.error && (
<p className="mt-2 text-xs text-red-600">{confirm.error.message}</p>
)}
</div>
);
}
// Unresolved-classification gate (ADR-0006). Lists every description still
// classified `Unknown` portfolio-wide and lets the user map each to a real value
// via the same per-description override path as Step 1 (it applies portfolio-
// wide). Finalise stays blocked until this list is empty — `Unknown` is never a
// final value, and an unresolved one would fail the import loudly.
function UnresolvedClassificationsPanel({
unknown,
portfolioId,
uploadId,
}: {
unknown: Record<string, string[]>;
portfolioId: string;
uploadId: string;
}) {
const editClassification = useEditClassification(portfolioId, uploadId);
const total = Object.values(unknown).reduce((n, d) => n + d.length, 0);
return (
<div className="rounded-lg border border-red-200 bg-red-50 px-4 py-3">
<p className="text-sm font-semibold text-red-900">
Resolve unclassified descriptions ({total})
</p>
<p className="mt-0.5 text-xs text-red-800">
We couldn&apos;t classify these automatically. Map each to a category
before finalising an unresolved value would fail the import. Edits apply
to <span className="font-medium">every</span> row across the portfolio.
</p>
<div className="mt-3 space-y-3">
{Object.entries(unknown).map(([field, descriptions]) => {
const options = (CATEGORY_VALUES[field] ?? []).filter((o) => o !== "Unknown");
return (
<div key={field}>
<p className="text-[11px] font-semibold uppercase tracking-wide text-red-700">
{FIELD_LABEL[field] ?? field}
</p>
<div className="mt-1 space-y-1">
{descriptions.map((description) => (
<div
key={description}
className="flex items-center gap-2 text-xs text-red-900"
>
<span className="min-w-0 flex-1 truncate" title={description}>
{description}
</span>
<span className="text-red-400"></span>
<select
defaultValue=""
onChange={(e) =>
e.target.value &&
editClassification.mutate({
field,
description,
value: e.target.value,
})
}
disabled={editClassification.isPending}
className="max-w-[14rem] rounded border border-red-300 bg-white px-1.5 py-0.5 text-[11px] text-red-900"
>
<option value="" disabled>
Choose a category
</option>
{options.map((opt) => (
<option key={opt} value={opt}>
{opt}
</option>
))}
</select>
</div>
))}
</div>
</div>
);
})}
</div>
{editClassification.error && (
<p className="mt-1 text-xs text-red-600">{editClassification.error.message}</p>
)}
</div>
);
}
function StageButton({
label,
activeLabel,
isPending,
disabled = false,
disabledReason,
onClick,
}: {
label: string;
activeLabel: string;
isPending: boolean;
disabled?: boolean;
disabledReason?: string;
onClick: () => void;
}) {
const blocked = isPending || disabled;
return (
<button
onClick={onClick}
disabled={isPending}
disabled={blocked}
title={disabled && !isPending ? disabledReason : undefined}
className={`inline-flex items-center gap-2 self-start px-5 py-2 rounded-xl bg-gradient-to-br from-[#14163d] to-[#15173e] text-white text-sm font-bold transition-opacity ${
isPending ? "opacity-50 cursor-not-allowed" : "hover:opacity-90"
blocked ? "opacity-50 cursor-not-allowed" : "hover:opacity-90"
}`}
>
{isPending ? (

View file

@ -10,38 +10,14 @@ import {
ArrowsRightLeftIcon,
} from "@heroicons/react/24/outline";
import { useSetColumnMapping } from "@/lib/bulkUpload/client";
const INTERNAL_FIELDS = [
{ value: "address_1", label: "Address 1", required: true },
{ value: "address_2", label: "Address 2", required: false },
{ value: "address_3", label: "Address 3", required: false },
{ value: "postcode", label: "Postcode", required: true },
{ value: "internal_reference", label: "Internal Reference (Optional)", required: false },
{ value: "skip", label: "Skip this column", required: false },
];
const REQUIRED_VALUES = ["address_1", "postcode"];
function autoDetect(header: string): string {
const h = header.toLowerCase().replace(/[\s_\-]/g, "");
if (/^(address|addr)(line)?(1|one)?$/.test(h)) return "address_1";
if (/^(address|addr)(line)?(2|two)|^street$/.test(h)) return "address_2";
if (/^(address|addr)(line)?(3|three)|^locality$|^town$|^city$/.test(h)) return "address_3";
if (/^post(al)?code$|^postcode$|^pcode$/.test(h)) return "postcode";
if (/^(internal)?ref(erence)?$|^id$/.test(h)) return "internal_reference";
return "skip";
}
function buildInitialMapping(
headers: string[],
existing?: Record<string, string>
): Record<string, string> {
const mapping: Record<string, string> = {};
for (const h of headers) {
mapping[h] = existing?.[h] ?? autoDetect(h);
}
return mapping;
}
import {
ADDRESS_FIELDS,
CLASSIFIER_FIELDS,
NOT_PROVIDED,
buildInitialMapping,
validateColumnMapping,
type InternalField,
} from "@/lib/bulkUpload/columnFields";
interface Props {
portfolioId: string;
@ -59,19 +35,24 @@ export default function MapColumnsClient({
existingMapping,
}: Props) {
const router = useRouter();
// mapping: internal field → source CSV header. Unmapped fields are absent.
const [mapping, setMapping] = useState<Record<string, string>>(
buildInitialMapping(sourceHeaders, existingMapping)
);
const setMappingMutation = useSetColumnMapping(portfolioId, uploadId);
const mappedValues = Object.values(mapping).filter((v) => v !== "skip");
const missingRequired = REQUIRED_VALUES.filter((r) => !mappedValues.includes(r));
const validationError = validateColumnMapping(mapping);
const submitting = setMappingMutation.isPending;
const error = setMappingMutation.error?.message ?? null;
const canSubmit = missingRequired.length === 0 && !submitting;
const requestError = setMappingMutation.error?.message ?? null;
const canSubmit = validationError === null && !submitting;
function setField(header: string, value: string) {
setMapping((prev) => ({ ...prev, [header]: value }));
function setField(field: string, header: string) {
setMapping((prev) => {
const next = { ...prev };
if (header === NOT_PROVIDED) delete next[field];
else next[field] = header;
return next;
});
}
function handleSubmit() {
@ -86,6 +67,86 @@ export default function MapColumnsClient({
);
}
function renderRow(field: InternalField) {
const value = mapping[field.value] ?? NOT_PROVIDED;
const isMapped = value !== NOT_PROVIDED;
return (
<div
key={field.value}
className="grid grid-cols-12 items-center px-6 py-4 hover:bg-gray-50/50 transition-colors"
>
{/* Internal field */}
<div className="col-span-4 flex items-center gap-3">
<div className="w-8 h-8 rounded-lg bg-gray-100 flex items-center justify-center shrink-0">
<TableCellsIcon className="h-4 w-4 text-gray-400" />
</div>
<div>
<p className="text-sm font-semibold text-gray-900">
{field.label}
{field.required && <span className="text-amber-600"> *</span>}
</p>
<p className="text-xs text-gray-400">
{field.kind === "classifier" ? "Landlord description" : "Internal field"}
</p>
</div>
</div>
{/* Arrow */}
<div className="col-span-1 flex justify-center">
<ArrowsRightLeftIcon className="h-4 w-4 text-gray-300" />
</div>
{/* Header picker */}
<div className="col-span-5">
<select
value={value}
onChange={(e) => setField(field.value, e.target.value)}
className="w-full text-sm border border-gray-200 rounded-lg px-3 py-2 bg-white text-gray-800 focus:outline-none focus:ring-2 focus:ring-[#14163d]/20 focus:border-[#14163d]"
>
<option value={NOT_PROVIDED}>Not provided</option>
{sourceHeaders.map((header, index) => (
<option key={`${header}-${index}`} value={header}>
{header}
</option>
))}
</select>
</div>
{/* Status badge */}
<div className="col-span-2 flex justify-end">
<span
className={`inline-flex items-center gap-1 px-2.5 py-1 rounded-full text-xs font-semibold ${
isMapped ? "bg-amber-50 text-amber-700" : "bg-gray-100 text-gray-400"
}`}
>
<span className="w-1.5 h-1.5 rounded-full bg-current opacity-70" />
{isMapped ? "Mapped" : "Not provided"}
</span>
</div>
</div>
);
}
function renderSection(title: string, subtitle: string, fields: InternalField[]) {
return (
<div className="bg-white border border-gray-100 rounded-2xl overflow-hidden shadow-sm mb-6">
<div className="px-6 py-3 bg-gray-50 border-b border-gray-100">
<p className="text-xs font-semibold text-gray-500 uppercase tracking-wider">
{title}
</p>
<p className="text-xs text-gray-400 mt-0.5">{subtitle}</p>
</div>
{sourceHeaders.length === 0 ? (
<div className="px-6 py-12 text-center text-sm text-gray-400">
No headers found in this file.
</div>
) : (
<div className="divide-y divide-gray-50">{fields.map(renderRow)}</div>
)}
</div>
);
}
return (
<div className="max-w-4xl mx-auto px-6 py-10">
{/* Breadcrumb + step */}
@ -116,102 +177,27 @@ export default function MapColumnsClient({
Column Remapper
</h1>
<p className="text-sm text-gray-500 max-w-lg">
Align your spreadsheet headers with our internal property data structure to
ensure accurate address processing.
Tell us which spreadsheet column feeds each field. Address fields drive
matching; landlord-description fields are classified into property facts.
</p>
</div>
{/* Table */}
<div className="bg-white border border-gray-100 rounded-2xl overflow-hidden shadow-sm mb-6">
{/* Column headers */}
<div className="grid grid-cols-12 items-center px-6 py-3 bg-gray-50 border-b border-gray-100">
<span className="col-span-4 text-xs font-semibold text-gray-400 uppercase tracking-wider">
Spreadsheet Header
</span>
<span className="col-span-1" />
<span className="col-span-5 text-xs font-semibold text-gray-400 uppercase tracking-wider">
Internal Field Mapping
</span>
<span className="col-span-2 text-xs font-semibold text-gray-400 uppercase tracking-wider text-right">
Status
</span>
</div>
{sourceHeaders.length === 0 ? (
<div className="px-6 py-12 text-center text-sm text-gray-400">
No headers found in this file.
</div>
) : (
<div className="divide-y divide-gray-50">
{sourceHeaders.map((header) => {
const value = mapping[header] ?? "skip";
const isMapped = value !== "skip";
return (
<div
key={header}
className="grid grid-cols-12 items-center px-6 py-4 hover:bg-gray-50/50 transition-colors"
>
{/* Source header */}
<div className="col-span-4 flex items-center gap-3">
<div className="w-8 h-8 rounded-lg bg-gray-100 flex items-center justify-center shrink-0">
<TableCellsIcon className="h-4 w-4 text-gray-400" />
</div>
<div>
<p className="text-sm font-semibold text-gray-900">{header}</p>
<p className="text-xs text-gray-400">Source column</p>
</div>
</div>
{/* Arrow */}
<div className="col-span-1 flex justify-center">
<ArrowsRightLeftIcon className="h-4 w-4 text-gray-300" />
</div>
{/* Dropdown */}
<div className="col-span-5">
<select
value={value}
onChange={(e) => setField(header, e.target.value)}
className="w-full text-sm border border-gray-200 rounded-lg px-3 py-2 bg-white text-gray-800 focus:outline-none focus:ring-2 focus:ring-[#14163d]/20 focus:border-[#14163d]"
>
{INTERNAL_FIELDS.map((f) => (
<option key={f.value} value={f.value}>
{f.label}
</option>
))}
</select>
</div>
{/* Status badge */}
<div className="col-span-2 flex justify-end">
<span
className={`inline-flex items-center gap-1 px-2.5 py-1 rounded-full text-xs font-semibold ${
isMapped
? "bg-amber-50 text-amber-700"
: "bg-gray-100 text-gray-400"
}`}
>
<span className="w-1.5 h-1.5 rounded-full bg-current opacity-70" />
{isMapped ? "Mapped" : "Skipped"}
</span>
</div>
</div>
);
})}
</div>
)}
</div>
{/* Validation error */}
{missingRequired.length > 0 && (
<p className="text-xs text-amber-600 mb-4">
Required fields not yet mapped:{" "}
{missingRequired
.map((r) => INTERNAL_FIELDS.find((f) => f.value === r)?.label)
.join(", ")}
</p>
{renderSection(
"Address fields",
"Used for address matching. A column can feed only one address field.",
ADDRESS_FIELDS
)}
{error && <p className="text-xs text-red-500 mb-4">{error}</p>}
{renderSection(
"Landlord description fields (optional)",
"Classified into property facts. Several fields may share one column.",
CLASSIFIER_FIELDS
)}
{/* Validation / request error */}
{validationError && (
<p className="text-xs text-amber-600 mb-4">{validationError}</p>
)}
{requestError && <p className="text-xs text-red-500 mb-4">{requestError}</p>}
{/* Footer */}
<div className="flex items-center justify-between">
@ -249,8 +235,10 @@ export default function MapColumnsClient({
Pro Tip
</p>
<p className="text-sm text-gray-500 italic">
&ldquo;Ensure your source file doesn&apos;t have blank headers. Any column mapped to
&ldquo;Skip&rdquo; will be ignored during import.&rdquo;
&ldquo;Fields left as &ldquo;Not provided&rdquo; are ignored. The same
column can feed several landlord-description fields e.g. one
&ldquo;Property Type&rdquo; column can drive both Property Type and Built
Form.&rdquo;
</p>
</div>
</div>

View file

@ -67,6 +67,14 @@ const STATUS_CONFIG = {
body: "Matches ready, writing into your portfolio.",
cta: false,
},
finalising: {
icon: ArrowPathIcon,
iconBg: "bg-blue-50",
iconColor: "text-blue-500",
title: "Uploading to ARA",
body: "Creating your properties from the matched addresses. This can take a little while for large files.",
cta: false,
},
complete: {
icon: CheckCircleIcon,
iconBg: "bg-green-50",
@ -167,6 +175,7 @@ export default async function BulkUploadDetailPage(props: {
{(statusKey === "processing" ||
statusKey === "combining" ||
statusKey === "awaiting_review" ||
statusKey === "finalising" ||
statusKey === "complete" ||
statusKey === "failed") &&
upload.taskId && (
@ -174,6 +183,7 @@ export default async function BulkUploadDetailPage(props: {
portfolioSlug={slug}
portfolioId={upload.portfolioId}
uploadId={uploadId}
serverStatus={upload.status}
isDomnaUser={isDomnaUser}
/>
)}

View file

@ -14,6 +14,7 @@ import {
const STATUS_LABELS: Record<string, { label: string; classes: string }> = {
ready_for_processing: { label: "Ready", classes: "bg-amber-100 text-amber-700" },
processing: { label: "Processing", classes: "bg-blue-100 text-blue-700" },
finalising: { label: "Uploading to ARA", classes: "bg-blue-100 text-blue-700" },
complete: { label: "Complete", classes: "bg-green-100 text-green-700" },
failed: { label: "Failed", classes: "bg-red-100 text-red-700" },
};

View file

@ -0,0 +1,101 @@
import { getServerSession } from "next-auth";
import { redirect } from "next/navigation";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
import {
getLandlordOverrides,
type OverrideRow,
type LandlordOverrideCategory,
} from "@/lib/landlordOverrides/server";
import { CLASSIFIER_FIELDS } from "@/lib/bulkUpload/columnFields";
export default async function LandlordOverridesPage(props: {
params: Promise<{ slug: string }>;
}) {
const { slug } = await props.params;
const session = await getServerSession(AuthOptions);
if (!session) redirect("/login");
const results = await getLandlordOverrides(slug);
const total = Object.values(results).reduce((n, rows) => n + rows.length, 0);
return (
<div className="max-w-4xl mx-auto px-6 py-10">
<div className="mb-8">
<h1 className="text-3xl font-extrabold text-gray-900 tracking-tight mb-1">
Landlord overrides
</h1>
<p className="text-sm text-gray-500 max-w-lg">
Property facts classified from your bulk-upload descriptions. Read-only
editing comes later.
</p>
</div>
{total === 0 ? (
<div className="bg-white border border-gray-100 rounded-2xl px-6 py-12 text-center text-sm text-gray-400 shadow-sm">
No classified values yet. They appear here once a bulk upload with
landlord-description columns has been processed.
</div>
) : (
CLASSIFIER_FIELDS.map((field) => (
<OverrideSection
key={field.value}
title={field.label}
rows={results[field.value as LandlordOverrideCategory]}
/>
))
)}
</div>
);
}
function OverrideSection({ title, rows }: { title: string; rows: OverrideRow[] }) {
return (
<div className="bg-white border border-gray-100 rounded-2xl overflow-hidden shadow-sm mb-6">
<div className="flex items-center justify-between px-6 py-3 bg-gray-50 border-b border-gray-100">
<p className="text-xs font-semibold text-gray-500 uppercase tracking-wider">
{title}
</p>
<span className="text-xs text-gray-400">
{rows.length} {rows.length === 1 ? "value" : "values"}
</span>
</div>
{rows.length === 0 ? (
<div className="px-6 py-8 text-center text-sm text-gray-400">
No values for this category.
</div>
) : (
<div className="divide-y divide-gray-50">
{rows.map((row, i) => (
<div key={i} className="grid grid-cols-12 items-center px-6 py-3 gap-2">
<p
className="col-span-6 text-sm text-gray-700 truncate"
title={row.description}
>
{row.description}
</p>
<p className="col-span-4 text-sm font-semibold text-gray-900">
{row.value}
</p>
<div className="col-span-2 flex justify-end">
<SourceBadge source={row.source} />
</div>
</div>
))}
</div>
)}
</div>
);
}
function SourceBadge({ source }: { source: string }) {
const isUser = source === "user";
return (
<span
className={`inline-flex items-center px-2.5 py-1 rounded-full text-xs font-semibold ${
isUser ? "bg-emerald-50 text-emerald-700" : "bg-indigo-50 text-indigo-700"
}`}
>
{isUser ? "user" : "classifier"}
</span>
);
}

View file

@ -96,6 +96,100 @@ export function useSetColumnMapping(portfolioId: string, uploadId: string) {
});
}
// field -> description -> resolved enum, for the multi-entry sample (issue #298).
export type SampleClassifications = Record<string, Record<string, string>>;
export function useEditClassification(portfolioId: string, uploadId: string) {
const queryClient = useQueryClient();
return useMutation<void, Error, { field: string; description: string; value: string }>({
mutationFn: async (input) => {
const res = await fetch(
`/api/portfolio/${portfolioId}/bulk-uploads/${uploadId}/classifications`,
{
method: "PATCH",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(input),
},
);
if (!res.ok) throw await parseError(res, "Failed to save classification.");
},
onSuccess: () => {
queryClient.invalidateQueries({
queryKey: [...bulkUploadKeys.progress(uploadId), "classifications"],
});
},
});
}
// Sample classifications for the review panels PLUS the still-`Unknown`
// descriptions that gate Finalise (ADR-0006).
export interface ClassificationsView {
classifications: SampleClassifications;
unknown: Record<string, string[]>;
}
export function useSampleClassifications(
portfolioId: string,
uploadId: string,
enabled: boolean,
) {
return useQuery<ClassificationsView, Error>({
queryKey: [...bulkUploadKeys.progress(uploadId), "classifications"],
enabled,
queryFn: async () => {
const res = await fetch(
`/api/portfolio/${portfolioId}/bulk-uploads/${uploadId}/classifications`,
);
if (!res.ok) throw await parseError(res, "Failed to load classifications.");
const body = await res.json();
return {
classifications: (body.classifications ?? {}) as SampleClassifications,
unknown: (body.unknown ?? {}) as Record<string, string[]>,
};
},
});
}
export function useConfirmMultiEntryOrdering(portfolioId: string, uploadId: string) {
const queryClient = useQueryClient();
return useMutation<BulkUpload, Error, { permutations: Record<string, number[]> }>({
mutationFn: async (input) => {
const res = await fetch(
`/api/portfolio/${portfolioId}/bulk-uploads/${uploadId}/multi-entry-ordering`,
{
method: "PATCH",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(input),
},
);
if (!res.ok) throw await parseError(res, "Failed to save ordering.");
return res.json();
},
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: bulkUploadKeys.progress(uploadId) });
},
});
}
// Records the "Verify classification" acknowledgement (ADR-0004 Step 1),
// unlocking Finalise. Per-row corrections go through useEditClassification.
export function useConfirmVerification(portfolioId: string, uploadId: string) {
const queryClient = useQueryClient();
return useMutation<BulkUpload, Error, void>({
mutationFn: async () => {
const res = await fetch(
`/api/portfolio/${portfolioId}/bulk-uploads/${uploadId}/verify-classification`,
{ method: "PATCH" },
);
if (!res.ok) throw await parseError(res, "Failed to confirm classification.");
return res.json();
},
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: bulkUploadKeys.progress(uploadId) });
},
});
}
export function useStartAddressMatching(portfolioId: string, uploadId: string) {
const queryClient = useQueryClient();
return useMutation<{ taskId: string }, Error, void>({
@ -113,7 +207,11 @@ export function useStartAddressMatching(portfolioId: string, uploadId: string) {
});
}
export function useBulkUploadProgress(portfolioId: string, uploadId: string) {
export function useBulkUploadProgress(
portfolioId: string,
uploadId: string,
options?: { onSuccess?: (data: ProgressView) => void },
) {
return useQuery<ProgressView, Error>({
queryKey: bulkUploadKeys.progress(uploadId),
queryFn: async () => {
@ -127,6 +225,9 @@ export function useBulkUploadProgress(portfolioId: string, uploadId: string) {
const status = data?.upload.status;
return status && isTerminalStatus(status) ? false : 3000;
},
// v4 onSuccess fires after each successful poll; callers use it to react to a
// status transition (e.g. refresh the server page once it goes terminal).
onSuccess: options?.onSuccess,
});
}

View file

@ -0,0 +1,100 @@
// Single source of truth for BulkUpload column mapping.
//
// The mapping is stored as `field → source CSV header` (one entry per mapped
// internal field). One source header may feed several CLASSIFIER fields (e.g.
// "Property Type" → both property_type and built_form_type) but at most one
// ADDRESS field — see docs/adr/0003-classifier-triggers-as-address-subtask.md
// and docs/wip/landlord-override-frontend-plan.md (Q2.2, Q3).
//
// Classifier field `value`s mirror the Model service's ClassifiableColumn names
// (property_type / built_form_type / wall_type / roof_type) so the mapping can
// be forwarded to the lambda trigger verbatim.
export type InternalFieldKind = "address" | "classifier";
export interface InternalField {
value: string;
label: string;
kind: InternalFieldKind;
required: boolean;
// Canonical header written into the address-matching CSV (address fields only).
outputHeader?: string;
}
export const INTERNAL_FIELDS: InternalField[] = [
{ value: "address_1", label: "Address 1", kind: "address", required: true, outputHeader: "Address 1" },
{ value: "address_2", label: "Address 2", kind: "address", required: false, outputHeader: "Address 2" },
{ value: "address_3", label: "Address 3", kind: "address", required: false, outputHeader: "Address 3" },
{ value: "postcode", label: "Postcode", kind: "address", required: true, outputHeader: "postcode" },
{ value: "internal_reference", label: "Internal Reference", kind: "address", required: false, outputHeader: "Internal Reference" },
{ value: "property_type", label: "Property Type", kind: "classifier", required: false },
{ value: "built_form_type", label: "Built Form", kind: "classifier", required: false },
{ value: "wall_type", label: "Wall Type", kind: "classifier", required: false },
{ value: "roof_type", label: "Roof Type", kind: "classifier", required: false },
];
export const ADDRESS_FIELDS = INTERNAL_FIELDS.filter((f) => f.kind === "address");
export const CLASSIFIER_FIELDS = INTERNAL_FIELDS.filter((f) => f.kind === "classifier");
export const CLASSIFIER_FIELD_VALUES = CLASSIFIER_FIELDS.map((f) => f.value);
export const REQUIRED_FIELD_VALUES = INTERNAL_FIELDS.filter((f) => f.required).map((f) => f.value);
// Sentinel for an unmapped field in the UI dropdown ("Not provided").
export const NOT_PROVIDED = "";
// header → address field detection. Classifier fields are never auto-detected
// (Q2.1): mapping them is always an explicit user choice.
export function autoDetectField(header: string): string | null {
const h = header.toLowerCase().replace(/[\s_\-]/g, "");
if (/^(address|addr)(line)?(1|one)?$/.test(h)) return "address_1";
if (/^(address|addr)(line)?(2|two)|^street$/.test(h)) return "address_2";
if (/^(address|addr)(line)?(3|three)|^locality$|^town$|^city$/.test(h)) return "address_3";
if (/^post(al)?code$|^postcode$|^pcode$/.test(h)) return "postcode";
if (/^(internal)?ref(erence)?$|^id$/.test(h)) return "internal_reference";
return null;
}
// Build the initial field→header mapping: keep any existing choices, then
// auto-fill address fields from the headers (first matching header wins).
export function buildInitialMapping(
sourceHeaders: string[],
existing?: Record<string, string>,
): Record<string, string> {
const mapping: Record<string, string> = { ...(existing ?? {}) };
for (const header of sourceHeaders) {
const field = autoDetectField(header);
if (!field) continue;
if (mapping[field] === undefined) mapping[field] = header;
}
return mapping;
}
// Validation shared by the client (live) and the server (authoritative).
// Returns the first problem as a message, or null when the mapping is valid.
export function validateColumnMapping(mapping: Record<string, string>): string | null {
for (const field of REQUIRED_FIELD_VALUES) {
if (!mapping[field]) {
const label = INTERNAL_FIELDS.find((f) => f.value === field)?.label ?? field;
return `${label} must be mapped to a column.`;
}
}
const usedAddressHeaders = new Set<string>();
for (const field of ADDRESS_FIELDS) {
const header = mapping[field.value];
if (!header) continue;
if (usedAddressHeaders.has(header)) {
return `Column "${header}" is mapped to more than one address field.`;
}
usedAddressHeaders.add(header);
}
return null;
}
// The classifier subset of the mapping (category → source header) that gets
// forwarded to the lambda trigger. Address fields are intentionally excluded.
export function classifierMapping(mapping: Record<string, string>): Record<string, string> {
const out: Record<string, string> = {};
for (const field of CLASSIFIER_FIELD_VALUES) {
if (mapping[field]) out[field] = mapping[field];
}
return out;
}

View file

@ -0,0 +1,110 @@
import { describe, expect, it } from "vitest";
import { detectMultiEntry, assignmentToPermutation, isPermutation } from "./multiEntry";
// field -> source header, the shape stored on the upload. property_type and
// built_form_type intentionally share a header (the classifier allows it).
const MAPPING = {
address_1: "Addr",
postcode: "PC",
property_type: "Property Type",
wall_type: "Walls",
roof_type: "Roofs",
};
describe("detectMultiEntry", () => {
it("returns an empty summary when no classifier columns are mapped", () => {
const rows = [{ Addr: "1 High St", PC: "AB1 2CD" }];
const summary = detectMultiEntry(rows, { address_1: "Addr", postcode: "PC" });
expect(summary.sample).toBeNull();
expect(summary.largestCount).toBe(0);
});
it("captures a single-part verify sample when classifier columns exist but no row is multi-entry", () => {
const rows = [
{ Addr: "1 High St", PC: "AB1 2CD", "Property Type": "House: EndTerrace", Walls: "Cavity: AsBuilt", Roofs: "Pitched: 200mm" },
];
const summary = detectMultiEntry(rows, MAPPING);
// Not multi-entry, but there IS a sample to verify (ADR-0004 Step 1).
expect(summary.largestCount).toBe(0);
expect(summary.sample).not.toBeNull();
expect(summary.sample!.count).toBe(1);
expect(summary.sample!.address).toBe("1 High St, AB1 2CD");
// All three mapped classifier columns are present, one entry each.
expect(summary.sample!.columns.map((c) => c.field).sort()).toEqual([
"property_type",
"roof_type",
"wall_type",
]);
expect(summary.sample!.columns.every((c) => c.entries.length === 1)).toBe(true);
});
it("picks the largest-count row as the sample and reports it as multi-entry", () => {
const rows = [
{ Addr: "1 High St", PC: "AB1 2CD", "Property Type": "House: EndTerrace", Walls: "Cavity: AsBuilt", Roofs: "Pitched: 200mm" },
{ Addr: "2 Low St", PC: "AB3 4EF", "Property Type": "House: Detached", Walls: "Cavity: AsBuilt, Cavity: Filled", Roofs: "Flat: AsBuilt, Pitched: 200mm" },
];
const summary = detectMultiEntry(rows, MAPPING);
expect(summary.largestCount).toBe(2);
expect(summary.countDistribution).toEqual({ "2": 1 });
expect(summary.sample!.address).toBe("2 Low St, AB3 4EF");
expect(summary.sample!.count).toBe(2);
// multiValuedFields are the ones that actually split.
expect([...summary.multiValuedFields].sort()).toEqual(["roof_type", "wall_type"]);
// The whole-dwelling Property Type column is still carried (for Step 1),
// with a single entry — Step 2 filters it out by entries.length.
const propertyCol = summary.sample!.columns.find((c) => c.field === "property_type");
expect(propertyCol?.entries).toHaveLength(1);
const wallCol = summary.sample!.columns.find((c) => c.field === "wall_type");
expect(wallCol?.entries.map((e) => e.raw)).toEqual(["Cavity: AsBuilt", "Cavity: Filled"]);
});
it("captures one ordering sample per distinct count (ADR-0004 amendment)", () => {
const rows = [
{ Addr: "1 High St", PC: "AB1 2CD", "Property Type": "House: Detached", Walls: "Cavity: AsBuilt", Roofs: "Pitched: 200mm" }, // count 1
{ Addr: "2 Low St", PC: "AB3 4EF", "Property Type": "House: Semi", Walls: "Cavity, Solid", Roofs: "Flat, Pitched" }, // count 2
{ Addr: "3 Mid Rd", PC: "AB5 6GH", "Property Type": "House: Mid", Walls: "Cavity, Solid, Render", Roofs: "Flat, Pitched, Slate" }, // count 3
{ Addr: "4 Side Ln", PC: "AB7 8IJ", "Property Type": "House: Other", Walls: "Brick, Stone", Roofs: "Tile, Slate" }, // count 2 again
];
const summary = detectMultiEntry(rows, MAPPING);
expect(summary.largestCount).toBe(3);
expect(summary.countDistribution).toEqual({ "2": 2, "3": 1 });
// A sample for every count >= 2 — and only those.
expect(Object.keys(summary.samplesByCount ?? {}).sort()).toEqual(["2", "3"]);
expect(summary.samplesByCount!["2"].count).toBe(2);
expect(summary.samplesByCount!["3"].count).toBe(3);
// The count-2 sample is the FIRST count-2 row, not the count-3 one.
expect(summary.samplesByCount!["2"].address).toBe("2 Low St, AB3 4EF");
const wall2 = summary.samplesByCount!["2"].columns.find((c) => c.field === "wall_type");
expect(wall2?.entries.map((e) => e.raw)).toEqual(["Cavity", "Solid"]);
const wall3 = summary.samplesByCount!["3"].columns.find((c) => c.field === "wall_type");
expect(wall3?.entries.map((e) => e.raw)).toEqual(["Cavity", "Solid", "Render"]);
});
it("normalizes descriptions to lower-case (matching the classifier's key)", () => {
const rows = [{ Addr: "1 High St", PC: "AB1 2CD", "Property Type": "House: EndTerrace", Walls: "", Roofs: "" }];
const summary = detectMultiEntry(rows, MAPPING);
const entry = summary.sample!.columns.find((c) => c.field === "property_type")!.entries[0];
expect(entry.raw).toBe("House: EndTerrace");
expect(entry.description).toBe("house: endterrace");
});
});
describe("ordering helpers", () => {
it("isPermutation accepts a bijection and rejects duplicates/out-of-range", () => {
expect(isPermutation([0, 1])).toBe(true);
expect(isPermutation([1, 0, 2])).toBe(true);
expect(isPermutation([0, 0])).toBe(false);
expect(isPermutation([0, 2])).toBe(false);
});
it("assignmentToPermutation inverts assignment[pos]=slot to permutation[slot]=pos", () => {
// file position 1 holds the main building (slot 0), position 0 is extension 1.
expect(assignmentToPermutation([1, 0])).toEqual([1, 0]);
expect(assignmentToPermutation([0, 1, 2])).toEqual([0, 1, 2]);
expect(assignmentToPermutation([2, 0, 1])).toEqual([1, 2, 0]);
});
});

View file

@ -0,0 +1,192 @@
// Multi-entry building-part detection (ADR-0004).
//
// A BulkUpload row can carry several comma-separated entries in a physical-
// element column (e.g. Walls = "Cavity: AsBuilt (1976-1982), Cavity:
// FilledCavity"). Each entry is a Building part (Main building + Extensions).
// This module finds that pattern and captures one sample — the row with the
// MOST building parts — so the user can confirm the ordering downstream.
//
// Pure + I/O-free so it's unit-testable; the start-address-matching route runs
// it over the already-parsed upload rows and persists the result on the upload.
import { ADDRESS_FIELDS, classifierMapping } from "./columnFields";
import type {
MultiEntryEntry,
MultiEntryColumn,
MultiEntrySample,
MultiEntrySummary,
} from "@/app/db/schema/bulk_address_uploads";
// The jsonb shape lives with the column (schema/bulk_address_uploads.ts) so the
// migration is self-contained; re-export here for callers of this module.
export type {
MultiEntryEntry,
MultiEntryColumn,
MultiEntrySample,
MultiEntrySummary,
MultiEntryOrdering,
} from "@/app/db/schema/bulk_address_uploads";
// --- Building-part ordering (ADR-0004) ---
// Label for building-part slot k: slot 0 is the Main building, the rest are
// numbered extensions.
export function partLabel(slot: number): string {
return slot === 0 ? "Main building" : `Extension ${slot}`;
}
// True when `arr` is a permutation of [0, n-1] (each slot used exactly once).
export function isPermutation(arr: number[]): boolean {
const seen = new Set(arr);
return (
seen.size === arr.length &&
arr.every((n) => Number.isInteger(n) && n >= 0 && n < arr.length)
);
}
// The UI collects, per file position, which building-part slot it holds
// (`assignment[pos] = slot`). Storage is keyed the other way —
// `permutation[slot] = pos` — so a consumer can ask "which file position holds
// the main building?". Both are permutations of [0, n-1]; this inverts one to
// the other.
export function assignmentToPermutation(assignment: number[]): number[] {
const permutation = new Array<number>(assignment.length);
assignment.forEach((slot, position) => {
permutation[slot] = position;
});
return permutation;
}
export const EMPTY_MULTI_ENTRY_SUMMARY: MultiEntrySummary = {
multiValuedFields: [],
countDistribution: {},
largestCount: 0,
sample: null,
samplesByCount: {},
};
// Split a cell into building-part entries. Mirrors the classifier's
// split(",") → trim → lower, dropping empty fragments so positions align
// across raw and normalized forms.
export function splitEntries(value: unknown): MultiEntryEntry[] {
return String(value ?? "")
.split(",")
.map((s) => s.trim())
.filter((s) => s.length > 0)
.map((raw) => ({ raw, description: raw.toLowerCase() }));
}
// Compose a display address from the mapped address fields (reference excluded).
function buildAddress(
row: Record<string, unknown>,
columnMapping: Record<string, string>,
): string {
const parts: string[] = [];
for (const field of ADDRESS_FIELDS) {
if (field.value === "internal_reference") continue;
const header = columnMapping[field.value];
if (!header) continue;
const value = String(row[header] ?? "").trim();
if (value) parts.push(value);
}
return parts.join(", ");
}
// Scan the mapped classifier columns and capture one sample address. Only
// classifier columns are considered — they're the physical-element descriptions
// we slice into building parts; address columns are single-valued by nature.
//
// The sample serves both review steps (ADR-0004): the largest-count multi-entry
// row when one exists (Step 2 — Confirm order), otherwise the first row carrying
// any classifier value so Step 1 — Verify classification still has something to
// show. `largestCount >= 2` is the multi-entry signal; `sample != null` means
// "there is something to verify". The sample carries every mapped classifier
// column with a value — Step 1 lists them all; Step 2 renders only the
// multi-valued ones.
export function detectMultiEntry(
rows: Array<Record<string, unknown>>,
columnMapping: Record<string, string>,
): MultiEntrySummary {
const classifierCols = Object.entries(classifierMapping(columnMapping));
if (classifierCols.length === 0) return EMPTY_MULTI_ENTRY_SUMMARY;
const multiValued = new Set<string>();
const countDistribution: Record<string, number> = {};
let largestCount = 0;
let multiEntryRowIndex = -1;
// Fallback sample for Step 1 when no row is multi-entry: the first row that
// carries any classifier value.
let firstClassifiedRowIndex = -1;
// First row index seen at each distinct count ≥ 2 — one ordering sample per
// count (ADR-0004 amendment): each count needs its own confirmed permutation.
const sampleRowIndexByCount: Record<string, number> = {};
rows.forEach((row, index) => {
let rowMax = 0;
let hasValue = false;
for (const [field, header] of classifierCols) {
const n = splitEntries(row[header]).length;
if (n > 0) hasValue = true;
if (n > 1) multiValued.add(field);
if (n > rowMax) rowMax = n;
}
if (hasValue && firstClassifiedRowIndex === -1) firstClassifiedRowIndex = index;
if (rowMax >= 2) {
const key = String(rowMax);
countDistribution[key] = (countDistribution[key] ?? 0) + 1;
if (sampleRowIndexByCount[key] === undefined) sampleRowIndexByCount[key] = index;
// First row at a new maximum becomes the multi-entry (Step 1) sample.
if (rowMax > largestCount) {
largestCount = rowMax;
multiEntryRowIndex = index;
}
}
});
const sampleRowIndex =
multiEntryRowIndex !== -1 ? multiEntryRowIndex : firstClassifiedRowIndex;
if (sampleRowIndex === -1) {
return {
multiValuedFields: [...multiValued],
countDistribution,
largestCount,
sample: null,
samplesByCount: {},
};
}
// One ordering sample per distinct count, so the UI can render a panel per
// count and the user confirms each independently.
const samplesByCount: Record<string, MultiEntrySample> = {};
for (const [count, rowIndex] of Object.entries(sampleRowIndexByCount)) {
samplesByCount[count] = sampleFromRow(rows[rowIndex], columnMapping, classifierCols, Number(count));
}
return {
multiValuedFields: [...multiValued],
countDistribution,
largestCount,
sample: sampleFromRow(
rows[sampleRowIndex],
columnMapping,
classifierCols,
largestCount >= 2 ? largestCount : 1,
),
samplesByCount,
};
}
// Build the sample for one row: its display address plus every mapped classifier
// column carrying a value. Step 1 lists all columns; Step 2's order table filters
// to the multi-valued ones (single-value columns are whole-dwelling facts).
function sampleFromRow(
row: Record<string, unknown>,
columnMapping: Record<string, string>,
classifierCols: Array<[string, string]>,
count: number,
): MultiEntrySample {
const columns: MultiEntryColumn[] = classifierCols
.map(([field, header]) => ({ field, header, entries: splitEntries(row[header]) }))
.filter((column) => column.entries.length > 0);
return { address: buildAddress(row, columnMapping), count, columns };
}

View file

@ -0,0 +1,23 @@
// Shared S3 key conventions + the synthetic join-column name for bulk-upload
// artifacts. The finaliser join (ADR-0006) depends on the classifier CSV key
// being built *identically* in two places — where the CSV is written
// (start-address-matching) and where the finaliser is dispatched
// (dispatchFinaliser) — and on the `source_row_id` column appearing in both the
// address CSV and the classifier CSV. Keeping the convention here is the single
// source of truth that stops those two callers drifting.
export const BULK_UPLOAD_INPUT_PREFIX = "bulk_onboarding_inputs";
export function addressCsvKey(portfolioId: string, uploadId: string): string {
return `${BULK_UPLOAD_INPUT_PREFIX}/${portfolioId}/${uploadId}.csv`;
}
export function classifierCsvKey(portfolioId: string, uploadId: string): string {
return `${BULK_UPLOAD_INPUT_PREFIX}/${portfolioId}/${uploadId}-classifier.csv`;
}
// The synthetic per-row UUID column. Minted at start-address-matching and
// emitted into both CSVs so the finaliser can join a row's identity (combiner
// output) to its raw descriptions (classifier CSV). The Model finaliser reads
// this exact header — keep the two in sync.
export const SOURCE_ROW_ID_COLUMN = "source_row_id";

View file

@ -1,9 +1,25 @@
import { db } from "@/app/db/db";
import { bulkAddressUploads } from "@/app/db/schema/bulk_address_uploads";
import {
landlordPropertyTypeOverrides,
landlordBuiltFormTypeOverrides,
landlordWallTypeOverrides,
landlordRoofTypeOverrides,
PropertyTypeValues,
BuiltFormTypeValues,
WallTypeValues,
RoofTypeValues,
} from "@/app/db/schema/landlord_overrides";
import { tasks } from "@/app/db/schema/tasks/tasks";
import { subTasks } from "@/app/db/schema/tasks/subtask";
import { count, desc, eq, sql } from "drizzle-orm";
import { and, count, desc, eq, inArray, sql } from "drizzle-orm";
import type { BulkUpload, BulkUploadStatus, ProgressView, TaskSummary } from "./types";
import { validateColumnMapping, classifierMapping } from "./columnFields";
import { classifierCsvKey } from "./s3Keys";
import { retrofitDataS3Bucket } from "@/app/utils/s3";
import { SUBTASK_SERVICE } from "./types";
import type { MultiEntrySummary } from "./multiEntry";
import { isPermutation } from "./multiEntry";
const REMAP_ALLOWED: ReadonlySet<BulkUploadStatus> = new Set([
"ready_for_processing",
@ -78,6 +94,12 @@ async function loadTaskSummary(taskId: string): Promise<TaskSummary | null> {
totalSubtasks: count(subTasks.id),
completedSubtasks: sql<number>`count(case when lower(${subTasks.status}) in ('completed', 'complete') then 1 end)::int`,
failedSubtasks: sql<number>`count(case when lower(${subTasks.status}) in ('failed', 'failure', 'error') then 1 end)::int`,
addressTotal: sql<number>`count(case when (${subTasks.service} = 'address2uprn' or ${subTasks.service} is null) and ${subTasks.id} is not null then 1 end)::int`,
addressCompleted: sql<number>`count(case when (${subTasks.service} = 'address2uprn' or ${subTasks.service} is null) and lower(${subTasks.status}) in ('completed', 'complete') then 1 end)::int`,
addressFailed: sql<number>`count(case when (${subTasks.service} = 'address2uprn' or ${subTasks.service} is null) and lower(${subTasks.status}) in ('failed', 'failure', 'error') then 1 end)::int`,
classifierTotal: sql<number>`count(case when ${subTasks.service} = 'landlord_description_overrides' then 1 end)::int`,
classifierCompleted: sql<number>`count(case when ${subTasks.service} = 'landlord_description_overrides' and lower(${subTasks.status}) in ('completed', 'complete') then 1 end)::int`,
classifierFailed: sql<number>`count(case when ${subTasks.service} = 'landlord_description_overrides' and lower(${subTasks.status}) in ('failed', 'failure', 'error') then 1 end)::int`,
})
.from(tasks)
.leftJoin(subTasks, eq(subTasks.taskId, tasks.id))
@ -94,11 +116,300 @@ export async function getProgressView(uploadId: string): Promise<ProgressView |
return { upload, task };
}
function validateMapping(mapping: Record<string, string>): string | null {
const values = Object.values(mapping);
if (!values.includes("address_1")) return "Mapping must include address_1.";
if (!values.includes("postcode")) return "Mapping must include postcode.";
return null;
// Persist the multi-entry building-part detection (ADR-0004). Computed once at
// start-address-matching from the already-parsed rows; read back on the
// awaiting_review surface. Only this column is touched, so the later
// status/taskId update leaves it intact.
export async function saveMultiEntrySummary(
uploadId: string,
summary: MultiEntrySummary,
): Promise<void> {
await db
.update(bulkAddressUploads)
.set({ multiEntrySummary: summary })
.where(eq(bulkAddressUploads.id, uploadId));
}
// Classifier field -> resolved enum, keyed by the normalized description that
// the classifier persisted. Empty inner maps / absent fields mean "not
// classified (yet)". Read-only (ADR-0004, issue #298).
export type SampleClassifications = Record<string, Record<string, string>>;
// Look up the classifier's resolved enums for one field's descriptions. One
// branch per category keeps drizzle's per-table enum typing intact.
async function lookupOverrides(
field: string,
portfolioId: bigint,
descriptions: string[],
): Promise<Array<{ description: string; value: string }> | null> {
switch (field) {
case "property_type":
return db
.select({ description: landlordPropertyTypeOverrides.description, value: landlordPropertyTypeOverrides.value })
.from(landlordPropertyTypeOverrides)
.where(and(eq(landlordPropertyTypeOverrides.portfolioId, portfolioId), inArray(landlordPropertyTypeOverrides.description, descriptions)));
case "built_form_type":
return db
.select({ description: landlordBuiltFormTypeOverrides.description, value: landlordBuiltFormTypeOverrides.value })
.from(landlordBuiltFormTypeOverrides)
.where(and(eq(landlordBuiltFormTypeOverrides.portfolioId, portfolioId), inArray(landlordBuiltFormTypeOverrides.description, descriptions)));
case "wall_type":
return db
.select({ description: landlordWallTypeOverrides.description, value: landlordWallTypeOverrides.value })
.from(landlordWallTypeOverrides)
.where(and(eq(landlordWallTypeOverrides.portfolioId, portfolioId), inArray(landlordWallTypeOverrides.description, descriptions)));
case "roof_type":
return db
.select({ description: landlordRoofTypeOverrides.description, value: landlordRoofTypeOverrides.value })
.from(landlordRoofTypeOverrides)
.where(and(eq(landlordRoofTypeOverrides.portfolioId, portfolioId), inArray(landlordRoofTypeOverrides.description, descriptions)));
default:
return null;
}
}
// The classifier's enums for the review samples' entries, joined by the
// normalized description (exact match — the summary stored it the way the
// classifier persists it, so no re-normalization here). Read-only. Covers the
// Step 1 verify sample AND every per-count ordering sample, since smaller-count
// panels may show descriptions the largest-count sample doesn't (ADR-0004
// amendment).
export async function getSampleClassifications(
uploadId: string,
): Promise<SampleClassifications> {
const upload = await loadById(uploadId);
const summary = upload?.multiEntrySummary;
if (!upload || !summary || !summary.sample) return {};
// Gather distinct descriptions per field across all samples.
const allSamples = [summary.sample, ...Object.values(summary.samplesByCount ?? {})];
const descriptionsByField: Record<string, Set<string>> = {};
for (const sample of allSamples) {
for (const column of sample.columns) {
const set = (descriptionsByField[column.field] ??= new Set<string>());
for (const e of column.entries) set.add(e.description);
}
}
const portfolioId = BigInt(upload.portfolioId);
const result: SampleClassifications = {};
for (const [field, descSet] of Object.entries(descriptionsByField)) {
const descriptions = [...descSet];
if (descriptions.length === 0) continue;
const rows = await lookupOverrides(field, portfolioId, descriptions);
if (!rows) continue;
result[field] = Object.fromEntries(rows.map((r) => [r.description, r.value]));
}
return result;
}
// Descriptions still classified `Unknown` per field, portfolio-wide (ADR-0006).
// `Unknown` is the classifier's "couldn't decide" marker; v2 treats it as
// never-final, so the Finalise gate blocks until the user maps every one to a
// real value (and the finaliser fails loudly if any slips through). Portfolio-
// wide is the right scope under the one-real-upload assumption (ADR-0006).
export type UnknownOverrides = Record<string, string[]>;
const UNKNOWN_VALUE = "Unknown";
async function unknownForField(field: string, portfolioId: bigint): Promise<string[]> {
switch (field) {
case "property_type":
return (
await db
.select({ description: landlordPropertyTypeOverrides.description })
.from(landlordPropertyTypeOverrides)
.where(and(eq(landlordPropertyTypeOverrides.portfolioId, portfolioId), eq(landlordPropertyTypeOverrides.value, UNKNOWN_VALUE)))
).map((r) => r.description);
case "built_form_type":
return (
await db
.select({ description: landlordBuiltFormTypeOverrides.description })
.from(landlordBuiltFormTypeOverrides)
.where(and(eq(landlordBuiltFormTypeOverrides.portfolioId, portfolioId), eq(landlordBuiltFormTypeOverrides.value, UNKNOWN_VALUE)))
).map((r) => r.description);
case "wall_type":
return (
await db
.select({ description: landlordWallTypeOverrides.description })
.from(landlordWallTypeOverrides)
.where(and(eq(landlordWallTypeOverrides.portfolioId, portfolioId), eq(landlordWallTypeOverrides.value, UNKNOWN_VALUE)))
).map((r) => r.description);
case "roof_type":
return (
await db
.select({ description: landlordRoofTypeOverrides.description })
.from(landlordRoofTypeOverrides)
.where(and(eq(landlordRoofTypeOverrides.portfolioId, portfolioId), eq(landlordRoofTypeOverrides.value, UNKNOWN_VALUE)))
).map((r) => r.description);
default:
return [];
}
}
export async function getUnknownOverrides(portfolioId: string): Promise<UnknownOverrides> {
const pid = BigInt(portfolioId);
const result: UnknownOverrides = {};
for (const field of ["property_type", "built_form_type", "wall_type", "roof_type"]) {
const descriptions = await unknownForField(field, pid);
if (descriptions.length > 0) result[field] = descriptions;
}
return result;
}
// Valid enum values per classifier category, for validating a user edit.
const CATEGORY_VALUES: Record<string, readonly string[]> = {
property_type: PropertyTypeValues,
built_form_type: BuiltFormTypeValues,
wall_type: WallTypeValues,
roof_type: RoofTypeValues,
};
// Upsert one user override (source='user') into the right category table. One
// branch per table keeps drizzle's per-table typing intact; the unique
// (portfolio_id, description) drives the conflict. Sets source='user' so the
// classifier's `ON CONFLICT … WHERE source='classifier'` never re-clobbers it.
async function upsertUserOverride(
field: string,
portfolioId: bigint,
description: string,
value: string,
): Promise<void> {
const now = new Date();
switch (field) {
case "property_type":
await db.insert(landlordPropertyTypeOverrides)
.values({ portfolioId, description, value, source: "user" })
.onConflictDoUpdate({
target: [landlordPropertyTypeOverrides.portfolioId, landlordPropertyTypeOverrides.description],
set: { value, source: "user", updatedAt: now },
});
return;
case "built_form_type":
await db.insert(landlordBuiltFormTypeOverrides)
.values({ portfolioId, description, value, source: "user" })
.onConflictDoUpdate({
target: [landlordBuiltFormTypeOverrides.portfolioId, landlordBuiltFormTypeOverrides.description],
set: { value, source: "user", updatedAt: now },
});
return;
case "wall_type":
await db.insert(landlordWallTypeOverrides)
.values({ portfolioId, description, value, source: "user" })
.onConflictDoUpdate({
target: [landlordWallTypeOverrides.portfolioId, landlordWallTypeOverrides.description],
set: { value, source: "user", updatedAt: now },
});
return;
case "roof_type":
await db.insert(landlordRoofTypeOverrides)
.values({ portfolioId, description, value, source: "user" })
.onConflictDoUpdate({
target: [landlordRoofTypeOverrides.portfolioId, landlordRoofTypeOverrides.description],
set: { value, source: "user", updatedAt: now },
});
return;
}
}
export type SetClassificationOutcome =
| { kind: "ok" }
| { kind: "invalid"; reason: string };
// Correct one classification, persisted as a user override (ADR-0004, issue
// #299). Keyed by (portfolio, description), so it changes the mapping for every
// row with that description portfolio-wide. The description is normalized to
// match the classifier's stored key.
export async function setClassificationOverride(
portfolioId: string,
field: string,
description: string,
value: string,
): Promise<SetClassificationOutcome> {
const allowed = CATEGORY_VALUES[field];
if (!allowed) return { kind: "invalid", reason: `Unknown category '${field}'` };
if (!allowed.includes(value))
return { kind: "invalid", reason: `'${value}' is not a valid ${field}` };
const normalized = description.trim().toLowerCase();
if (!normalized) return { kind: "invalid", reason: "Empty description" };
await upsertUserOverride(field, BigInt(portfolioId), normalized, value);
return { kind: "ok" };
}
export type SetOrderingOutcome =
| { kind: "ok"; upload: BulkUpload }
| { kind: "not_found" }
| { kind: "wrong_state"; current: string }
| { kind: "not_multi_entry" }
| { kind: "invalid_ordering"; reason: string };
// Persist the user-confirmed building-part ordering (ADR-0004, amended
// 2026-06-05). Allowed only at awaiting_review and only when the upload is
// multi-entry. Each distinct count ≥ 2 needs its own permutation; the UI confirms
// one count at a time, so we MERGE the supplied permutations into any already
// stored, validate each is a bijection, and only mark `confirmed` once EVERY
// detected count has a permutation (which gates Finalise).
export async function setMultiEntryOrdering(
uploadId: string,
permutations: Record<string, number[]>,
): Promise<SetOrderingOutcome> {
const upload = await loadById(uploadId);
if (!upload) return { kind: "not_found" };
if (upload.status !== "awaiting_review")
return { kind: "wrong_state", current: upload.status };
const summary = upload.multiEntrySummary;
// A sample now exists for non-multi-entry uploads too (Step 1's verify
// sample), so "is multi-entry" is largestCount >= 2, not "has a sample".
if (!summary || summary.largestCount < 2)
return { kind: "not_multi_entry" };
for (const [count, permutation] of Object.entries(permutations)) {
if (permutation.length !== Number(count) || !isPermutation(permutation))
return { kind: "invalid_ordering", reason: `Ordering for ${count} parts is not a valid arrangement.` };
}
// Merge with any counts confirmed earlier, then decide whether every detected
// count (the keys of countDistribution, all ≥ 2) now has a permutation.
const merged = { ...(upload.multiEntryOrdering?.permutations ?? {}), ...permutations };
const requiredCounts = Object.keys(summary.countDistribution);
const confirmed = requiredCounts.every(
(c) => Array.isArray(merged[c]) && merged[c].length === Number(c),
);
const [updated] = await db
.update(bulkAddressUploads)
.set({ multiEntryOrdering: { permutations: merged, confirmed } })
.where(eq(bulkAddressUploads.id, uploadId))
.returning();
if (!updated) return { kind: "not_found" };
return { kind: "ok", upload: updated };
}
export type SetVerifyAckOutcome =
| { kind: "ok"; upload: BulkUpload }
| { kind: "not_found" }
| { kind: "wrong_state"; current: string };
// Record the user's "Verify classification" acknowledgement (ADR-0004 Step 1).
// Allowed only at awaiting_review. Gates Finalise whenever the upload has
// classifier columns — independent of multi-entry, hence its own column rather
// than a flag on multiEntryOrdering.
export async function setVerifyAck(uploadId: string): Promise<SetVerifyAckOutcome> {
const upload = await loadById(uploadId);
if (!upload) return { kind: "not_found" };
if (upload.status !== "awaiting_review")
return { kind: "wrong_state", current: upload.status };
const [updated] = await db
.update(bulkAddressUploads)
.set({ verifyAck: true })
.where(eq(bulkAddressUploads.id, uploadId))
.returning();
if (!updated) return { kind: "not_found" };
return { kind: "ok", upload: updated };
}
export type SetMappingOutcome =
@ -116,7 +427,7 @@ export async function setColumnMapping(
if (!REMAP_ALLOWED.has(upload.status as BulkUploadStatus))
return { kind: "invalid_status", current: upload.status };
const reason = validateMapping(mapping);
const reason = validateColumnMapping(mapping);
if (reason) return { kind: "invalid_mapping", reason };
const [updated] = await db
@ -174,6 +485,7 @@ export async function triggerAddressMatching(args: {
.values({
taskId: task.id,
status: "waiting",
service: SUBTASK_SERVICE.address,
inputs: JSON.stringify({ bulk_upload_id: args.uploadId }),
})
.returning();
@ -209,9 +521,69 @@ export async function triggerAddressMatching(args: {
return { kind: "ok", taskId: task.id };
}
// Co-fires the landlord classifier as a subtask under the address task. Reads a
// dedicated classifier CSV (the classifier columns converted from the upload by
// the start-address-matching route — the address-matching CSV strips the
// description columns), so the lambda always parses a real CSV even for
// .xlsx/.xls uploads. Non-blocking: a trigger failure marks only the classifier
// subtask, so address matching is unaffected. See ADR-0003.
export async function triggerClassifier(args: {
taskId: string;
uploadId: string;
s3Uri: string;
sessionToken: string | undefined;
}): Promise<void> {
const upload = await loadById(args.uploadId);
if (!upload || !upload.columnMapping) return;
const columnMapping = classifierMapping(upload.columnMapping);
if (Object.keys(columnMapping).length === 0) return;
const [subTask] = await db
.insert(subTasks)
.values({
taskId: args.taskId,
status: "waiting",
service: SUBTASK_SERVICE.classifier,
inputs: JSON.stringify({ bulk_upload_id: args.uploadId }),
})
.returning();
const payload = {
task_id: args.taskId,
sub_task_id: subTask.id,
s3_uri: args.s3Uri,
portfolio_id: Number(upload.portfolioId),
column_mapping: columnMapping,
};
const trigger = await triggerFastApiPipeline({
endpoint: "/v1/bulk-uploads/trigger-landlord-overrides",
payload,
sessionToken: args.sessionToken,
});
if (!trigger.ok) {
await db
.update(subTasks)
.set({
status: "failed",
outputs: JSON.stringify({ error: trigger.message }),
})
.where(eq(subTasks.id, subTask.id));
return;
}
await db
.update(subTasks)
.set({ status: "in progress", inputs: JSON.stringify(payload) })
.where(eq(subTasks.id, subTask.id));
}
export type CombineRetriggerOutcome =
| { kind: "triggered"; taskId: string; subTaskId: string }
| { kind: "already_combined" }
| { kind: "already_dispatched" }
| { kind: "not_found" }
| { kind: "missing_task" }
| { kind: "trigger_failed"; status: number; message: string };
@ -225,6 +597,24 @@ export async function requestCombineRetrigger(args: {
if (!upload.taskId) return { kind: "missing_task" };
if (upload.combinedOutputS3Uri) return { kind: "already_combined" };
// CAS: atomically claim `processing → combining` — the double-dispatch guard
// (mirrors dispatchFinaliser's awaiting_review → finalising, ADR-0005). Of two
// rapid "Run Combiner" clicks exactly one flips the row; the loser updates 0
// rows and bails, so only one combiner subtask is ever dispatched. It also
// closes the window where status is still `processing` because the backend
// hasn't written `combining` yet.
const claimed = await db
.update(bulkAddressUploads)
.set({ status: "combining" })
.where(
and(
eq(bulkAddressUploads.id, args.uploadId),
eq(bulkAddressUploads.status, "processing"),
),
)
.returning();
if (claimed.length === 0) return { kind: "already_dispatched" };
const [subTask] = await db
.insert(subTasks)
.values({ taskId: upload.taskId, status: "waiting" })
@ -237,8 +627,23 @@ export async function requestCombineRetrigger(args: {
payload,
sessionToken: args.sessionToken,
});
if (!trigger.ok)
if (!trigger.ok) {
// Roll the claim back so the user can retry, and fail the subtask.
await Promise.all([
db
.update(bulkAddressUploads)
.set({ status: "processing" })
.where(eq(bulkAddressUploads.id, args.uploadId)),
db
.update(subTasks)
.set({
status: "failed",
outputs: JSON.stringify({ error: trigger.message }),
})
.where(eq(subTasks.id, subTask.id)),
]);
return { kind: "trigger_failed", status: trigger.status, message: trigger.message };
}
await db
.update(subTasks)
@ -265,9 +670,125 @@ export async function loadForFinalize(uploadId: string): Promise<LoadForFinalize
return { kind: "ready", upload };
}
export async function markFinalized(uploadId: string): Promise<void> {
await db
export type DispatchFinaliserOutcome =
| { kind: "ok"; taskId: string; subTaskId: string }
| { kind: "not_found" }
| { kind: "already_finalized" }
| { kind: "not_yet_combined" }
| { kind: "wrong_state"; current: string }
| { kind: "missing_task" }
| { kind: "trigger_failed"; status: number; message: string };
// Dispatch the async bulk_upload_finaliser (ADR-0005). Replaces the old
// synchronous property insert + markFinalized. Order matters:
// 1. loadForFinalize — rich guards (combined output present, awaiting_review).
// 2. CAS claim `awaiting_review → finalising` — the double-dispatch guard:
// of two simultaneous clicks exactly one updates a row; the loser gets 409.
// 3. create the finaliser subtask under the upload's existing Task + POST the
// trigger. On trigger failure, revert the status so the user can retry and
// mark the subtask failed. The backend writes the terminal complete/failed.
export async function dispatchFinaliser(args: {
uploadId: string;
sessionToken: string | undefined;
}): Promise<DispatchFinaliserOutcome> {
const guarded = await loadForFinalize(args.uploadId);
switch (guarded.kind) {
case "not_found":
return { kind: "not_found" };
case "already_finalized":
return { kind: "already_finalized" };
case "not_yet_combined":
return { kind: "not_yet_combined" };
case "wrong_state":
return { kind: "wrong_state", current: guarded.current };
}
const upload = guarded.upload;
if (!upload.taskId) return { kind: "missing_task" };
// CAS: atomically claim the dispatch. Only the request that flips
// awaiting_review → finalising proceeds; a concurrent one updates 0 rows.
const claimed = await db
.update(bulkAddressUploads)
.set({ status: "complete" })
.where(eq(bulkAddressUploads.id, uploadId));
.set({ status: "finalising" })
.where(
and(
eq(bulkAddressUploads.id, args.uploadId),
eq(bulkAddressUploads.status, "awaiting_review"),
),
)
.returning();
if (claimed.length === 0) {
const current = await loadById(args.uploadId);
if (current?.status === "complete") return { kind: "already_finalized" };
return { kind: "wrong_state", current: current?.status ?? "unknown" };
}
const [subTask] = await db
.insert(subTasks)
.values({
taskId: upload.taskId,
status: "waiting",
service: SUBTASK_SERVICE.finaliser,
inputs: JSON.stringify({ bulk_upload_id: args.uploadId }),
})
.returning();
// v2 (ADR-0006): the finaliser also writes property_overrides for UPRN-matched
// rows, which needs the classifier CSV (raw descriptions, joined to the
// combiner output by `source_row_id`) and the confirmed building-part ordering.
// Both are derivable here — we already hold the upload row, and dispatch runs
// after the user confirms ordering, so the value is final.
// - classifier_s3_uri: null when no classifier columns were mapped (no
// classifier CSV was written; the finaliser then writes no overrides).
// - multi_entry_ordering: permutations keyed by entry-count; {} when the
// upload is not multi-entry (every cell is a single building part → part 0).
const classifierMap = classifierMapping(upload.columnMapping ?? {});
const classifierS3Uri =
Object.keys(classifierMap).length > 0
? `s3://${retrofitDataS3Bucket()}/${classifierCsvKey(upload.portfolioId, args.uploadId)}`
: null;
const payload = {
task_id: upload.taskId,
sub_task_id: subTask.id,
s3_uri: upload.combinedOutputS3Uri,
portfolio_id: Number(upload.portfolioId),
bulk_upload_id: args.uploadId,
classifier_s3_uri: classifierS3Uri,
multi_entry_ordering: upload.multiEntryOrdering?.permutations ?? {},
// classifier category → source CSV header, so the finaliser knows which
// classifier-CSV column feeds each override_component (ADR-0006).
column_mapping: classifierMap,
};
const trigger = await triggerFastApiPipeline({
endpoint: "/v1/bulk-uploads/trigger-finaliser",
payload,
sessionToken: args.sessionToken,
});
if (!trigger.ok) {
// Roll the claim back so the user can retry, and fail the subtask.
await Promise.all([
db
.update(bulkAddressUploads)
.set({ status: "awaiting_review" })
.where(eq(bulkAddressUploads.id, args.uploadId)),
db
.update(subTasks)
.set({
status: "failed",
outputs: JSON.stringify({ error: trigger.message }),
})
.where(eq(subTasks.id, subTask.id)),
]);
return { kind: "trigger_failed", status: trigger.status, message: trigger.message };
}
await db
.update(subTasks)
.set({ status: "in progress", inputs: JSON.stringify(payload) })
.where(eq(subTasks.id, subTask.id));
return { kind: "ok", taskId: upload.taskId, subTaskId: subTask.id };
}

View file

@ -6,6 +6,10 @@ export const BULK_UPLOAD_STATUSES = [
"processing",
"combining",
"awaiting_review",
// In-flight state of the async finaliser (ADR-0005); mirrors `combining`. The
// status column is free text, so no enum migration is needed. UI renders this
// as "Uploading to ARA" — see STATUS_LABELS.
"finalising",
"complete",
"failed",
] as const;
@ -14,6 +18,32 @@ export type BulkUploadStatus = (typeof BULK_UPLOAD_STATUSES)[number];
export type BulkUpload = typeof bulkAddressUploads.$inferSelect;
// sub_task.service values. NULL is treated as address (legacy rows + the
// backend-spawned postcode-split children, which don't set it). See ADR-0003.
export const SUBTASK_SERVICE = {
address: "address2uprn",
classifier: "landlord_description_overrides",
finaliser: "bulk_upload_finaliser",
} as const;
// User-facing label for a BulkUpload status. The persisted enum value stays
// canonical (`finalising`); the product surface for that state is "Uploading to
// ARA" (ADR-0005 — a display-layer label, never the enum name).
export const STATUS_LABELS: Record<string, string> = {
ready_for_processing: "Ready for processing",
mapping_complete: "Mapping complete",
processing: "Processing",
combining: "Combining",
awaiting_review: "Awaiting review",
finalising: "Uploading to ARA",
complete: "Complete",
failed: "Failed",
};
export function statusLabel(status: string): string {
return STATUS_LABELS[status] ?? status;
}
export type TaskSummary = {
id: string;
taskSource: string;
@ -25,6 +55,14 @@ export type TaskSummary = {
totalSubtasks: number;
completedSubtasks: number;
failedSubtasks: number;
// Per-pipeline breakdown so onboarding progress can separate address
// matching from landlord classification (ADR-0003).
addressTotal: number;
addressCompleted: number;
addressFailed: number;
classifierTotal: number;
classifierCompleted: number;
classifierFailed: number;
};
export type ProgressView = {

View file

@ -0,0 +1,79 @@
import { db } from "@/app/db/db";
import {
landlordPropertyTypeOverrides,
landlordBuiltFormTypeOverrides,
landlordWallTypeOverrides,
landlordRoofTypeOverrides,
} from "@/app/db/schema/landlord_overrides";
import { asc, eq } from "drizzle-orm";
export interface OverrideRow {
description: string;
value: string;
source: string;
}
export type LandlordOverrideCategory =
| "property_type"
| "built_form_type"
| "wall_type"
| "roof_type";
export type LandlordOverrideResults = Record<LandlordOverrideCategory, OverrideRow[]>;
const EMPTY: LandlordOverrideResults = {
property_type: [],
built_form_type: [],
wall_type: [],
roof_type: [],
};
// Reads the four landlord_*_overrides tables for a portfolio. The portfolio id
// is a bigint FK; the bulk-upload flow carries it as a numeric string.
export async function getLandlordOverrides(
portfolioId: string
): Promise<LandlordOverrideResults> {
if (!/^\d+$/.test(portfolioId)) return EMPTY;
const pid = BigInt(portfolioId);
const [property_type, built_form_type, wall_type, roof_type] = await Promise.all([
db
.select({
description: landlordPropertyTypeOverrides.description,
value: landlordPropertyTypeOverrides.value,
source: landlordPropertyTypeOverrides.source,
})
.from(landlordPropertyTypeOverrides)
.where(eq(landlordPropertyTypeOverrides.portfolioId, pid))
.orderBy(asc(landlordPropertyTypeOverrides.description)),
db
.select({
description: landlordBuiltFormTypeOverrides.description,
value: landlordBuiltFormTypeOverrides.value,
source: landlordBuiltFormTypeOverrides.source,
})
.from(landlordBuiltFormTypeOverrides)
.where(eq(landlordBuiltFormTypeOverrides.portfolioId, pid))
.orderBy(asc(landlordBuiltFormTypeOverrides.description)),
db
.select({
description: landlordWallTypeOverrides.description,
value: landlordWallTypeOverrides.value,
source: landlordWallTypeOverrides.source,
})
.from(landlordWallTypeOverrides)
.where(eq(landlordWallTypeOverrides.portfolioId, pid))
.orderBy(asc(landlordWallTypeOverrides.description)),
db
.select({
description: landlordRoofTypeOverrides.description,
value: landlordRoofTypeOverrides.value,
source: landlordRoofTypeOverrides.source,
})
.from(landlordRoofTypeOverrides)
.where(eq(landlordRoofTypeOverrides.portfolioId, pid))
.orderBy(asc(landlordRoofTypeOverrides.description)),
]);
return { property_type, built_form_type, wall_type, roof_type };
}