From 77cb7d9c0572df51da0498e06fdaf80685d60645 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 3 Jun 2026 11:41:48 +0000 Subject: [PATCH] save current progress --- .claude/settings.json | 10 +- docs/design/bulk-upload-finaliser.md | 163 ++++++++++++++++++ .../[uploadId]/OnboardingProgress.tsx | 19 +- 3 files changed, 189 insertions(+), 3 deletions(-) create mode 100644 docs/design/bulk-upload-finaliser.md diff --git a/.claude/settings.json b/.claude/settings.json index e5143ec..974af6a 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -41,7 +41,15 @@ "Bash(npm run *)", "Bash(grep '\\\\.sql$')", "Bash(git status *)", - "Bash(git checkout *)" + "Bash(git checkout *)", + "Bash(git stash *)", + "Bash(git pull *)", + "Bash(git config *)", + "Bash(GIT_LITERAL_PATHSPECS=1 git add src/app/db/schema/bulk_address_uploads.ts src/lib/bulkUpload/multiEntry.ts src/lib/bulkUpload/server.ts src/lib/bulkUpload/client.ts src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/multi-entry-ordering/route.ts 'src/app/portfolio/[slug]/\\(portfolio\\)/bulk-upload/[uploadId]/OnboardingProgress.tsx' docs/wip/multi-entry-ordering-plan.md)", + "Bash(GIT_LITERAL_PATHSPECS=1 npx eslint src/lib/bulkUpload/server.ts src/lib/bulkUpload/client.ts \"src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/classifications/route.ts\" \"src/app/portfolio/[slug]/\\(portfolio\\)/bulk-upload/[uploadId]/OnboardingProgress.tsx\")", + "Bash(python3 -c \"from backend.app.config import Settings; print\\('COMBINER_SQS_URL =', Settings\\(\\).COMBINER_SQS_URL\\)\")", + "Bash(find /workspaces -name \"*.py\" -path \"*/domain/*\" -o -name \"subtasks.py\" 2>/dev/null | head -20)", + "Read(//workspaces/**)" ], "deny": [ "Bash(npx drizzle-kit generate)", diff --git a/docs/design/bulk-upload-finaliser.md b/docs/design/bulk-upload-finaliser.md new file mode 100644 index 0000000..b56c7b7 --- /dev/null +++ b/docs/design/bulk-upload-finaliser.md @@ -0,0 +1,163 @@ +# Design WIP: `bulk_upload_finaliser` + `property_overrides` + +> **Status:** In progress (grilling session paused 2026-06-03). Not an ADR yet. +> Resume from **Open question Q6** below. When decisions stabilise this should +> graduate into a new ADR in `docs/adr/` (frontend) and likely a companion ADR +> in the Model repo, plus a CONTEXT.md update (see "Docs to update"). + +## Goal + +Two linked pieces of work: + +1. **New backend application `bulk_upload_finaliser`** (lives in + `/workspaces/home/github/Model/applications/`, DDD-aligned — study + `/workspaces/home/github/Model/domain`). It reads the address-matching / + combiner output **and** the `landlord_*_overrides` vocabulary tables, then + writes Postgres correctly: the `property` rows (UPRN + address, as the + frontend does today) and — later — the new `property_overrides` rows. + Motivation: a property list can be ~40,000 rows, too big for a synchronous + Next.js HTTP handler. + +2. **New `property_overrides` table** — the per-Property fact layer that + **ADR-0004 explicitly deferred** ("the per-Property building-part fact layer + that consumes `multiEntryOrdering` and writes main/extension facts at + finalise"). One row per `(property, building_part, component)` carrying the + resolved enum value + provenance. + +**v1 scope:** the finaliser only needs to write `property` (UPRN + address), +matching today's frontend `/finalize`. The `property_overrides` *table* is +designed now; populating it is follow-up work. (Confirm scope — see Q-scope.) + +## Where this sits in the existing pipeline + +``` +BulkUpload → address matching → combiner → awaiting_review → [Finalise] + │ + (new) bulk_upload_finaliser ──────────┘ + reads: combiner output (S3) + landlord_*_overrides + writes: property (+ later property_overrides) + │ + downstream: Ingestion (EPC/solar fetch) + → PropertyBaseline (stage 2, + re-score-on-override seam, + Model ADR-0011/0012) +``` + +`Finalise` (the user action + state-machine gate) stays in Next.js; the new +application is the **worker it dispatches**. Downstream `PropertyBaseline` +already has an override-aware "re-score" seam — `property_overrides` will feed it. + +## Decisions locked + +| # | Decision | +|---|----------| +| Name | Application is **`bulk_upload_finaliser`**, in `Model/applications/`. `Finalise` stays the Next.js action that triggers it. | +| DDD | Follow the DDD structure under `Model/domain`. Domain terms discovered as needed. | +| Schema ownership | **Drizzle (frontend) owns migrations** for both `property` and the new `property_overrides`. | +| Backend access | Backend gets a **`PropertyOverrideRow` SQLModel** (mirror, like `landlord_wall_type_override_table.py`) + a **repository** (see `Model/infrastructure/postgres` + `Model/repositories` for examples). `PropertyRow` must drop its "backend never inserts" invariant and gain insertable columns. | +| Next.js `/finalize` | **Delete it** — fully replaced by the Lambda. | +| `property_overrides` shape | **Single polymorphic table**, not per-component tables. Accepts losing DB-level pgEnum typing on `value`. | +| `value` | `text` — a **denormalised snapshot copy** of the resolved enum value from `landlord_*_overrides` at materialise time (lets us see the value per-property even if vocabulary later changes). | +| `building_part` | **`smallint NOT NULL`**, explicit index: `0 = main building, 1 = extension 1, 2 = extension 2, …` (matches ADR-0004 `multiEntryOrdering.permutations` indexing). | +| Whole-dwelling components | **No special case.** `property_type`/`built_form` are *per-part-capable* too (an extension — conservatory, summer house — can be a different built form / property type). Today's files only supply them once, so they'll usually be written at `building_part = 0` only, but the schema allows per-part with no future migration. | + +## `property_overrides` — columns so far + +Roughly (subject to remaining open questions): + +``` +property_overrides + id uuid pk (default random) -- match landlord_* tables + property_id bigint NOT NULL FK → property.id (FE-owned table) + portfolio_id bigint NOT NULL FK → portfolio.id + building_part smallint NOT NULL -- 0 = main, 1 = ext 1, 2 = ext 2, … + component NOT NULL -- Q6: pgEnum vs text; value set + value text NOT NULL -- snapshot copy of landlord_* resolved enum + source override_source NOT NULL -- 'classifier' | 'user' (reuse existing pgEnum) + description text? -- Q7: store raw landlord description for provenance? + created_at timestamptz NOT NULL default now() + updated_at timestamptz NOT NULL default now() + -- UNIQUE (property_id, component, building_part) -- Q8: confirm +``` + +## Open questions (resume here) + +- **Q6 — `component` discriminator (WE STOPPED HERE).** pgEnum vs text, and the + value set. *Recommendation:* pgEnum `property_component` (or + `override_component`) with the established category names + **`wall_type`, `roof_type`, `property_type`, `built_form_type`** (same keys as + `column_mapping` / `ClassifiableColumn.name`, so finaliser maps category → + component with no translation). pgEnum over text: small closed set, typos + caught at write time (matters more now `value` is free text); new component = + one-line `ALTER TYPE … ADD VALUE`. + +- **Q7 — store raw `description` per override row?** For provenance / re-resolution + / debugging vs redundancy (the description already lives in `landlord_*_overrides`). + *Lean:* store it — cheap, and it pins what text produced this snapshot. + +- **Q8 — uniqueness + FKs.** Confirm `UNIQUE (property_id, component, building_part)`. + `property_id` FK → FE-owned `property.id`; `portfolio_id` as `bigint` (mirror + the `landlord_*` note: FK enforced by Drizzle migration, not the SQLModel). + +- **Q9 — `source` semantics.** Reuse the existing `override_source` pgEnum + (`classifier`/`user`). At materialise time the finaliser copies the source from + the `landlord_*_overrides` row it resolved from. Confirm there's no *per-property* + override concept yet (today overrides are edited at the **vocabulary/portfolio** + level per ADR-0004; property_overrides just snapshots the outcome). + +- **Q-scope — v1 scope.** Confirm v1 = `property` (UPRN + address) only; + `property_overrides` table created but **not populated** until follow-up. + +### Application-flow questions not yet reached + +- **Trigger + orchestration.** How does Finalise dispatch the finaliser? Likely + the existing `TaskOrchestrator` / `subtask_handler` pattern (see ADR-0003, + `landlord_description_overrides` handler) — Next.js `/finalize` triggers a + subtask instead of inserting. Need the trigger body shape (cf. + `LandlordDescriptionOverridesTriggerBody`: `task_id`, `sub_task_id`, `s3_uri`, + `portfolio_id`, …). + +- **State machine / who writes `complete`.** Today Next.js writes `complete` + synchronously after the insert. If Finalise becomes async, **FastAPI/Lambda + writes the terminal status** (mirroring how it already owns + `combining`/`awaiting_review`). This is a CONTEXT.md "Two writers" change. + +- **Input — does the combiner output carry the raw description cells?** v1 only + needs address/UPRN columns (confirmed present: `address2uprn_uprn`, + `address2uprn_address`, `address2uprn_lexiscore`, `Internal Reference`, + address/postcode). For `property_overrides` population (later) the finaliser + also needs the raw `Walls`/`Roofs`/`Property Type` cells **plus** + `multiEntryOrdering` to split per building part — confirm these survive into + the combiner output, or that the finaliser reads them from another source. + +- **Idempotency / re-run.** Today's insert uses + `onConflictDoNothing((portfolio_id, uprn) where uprn is not null)`. Define the + finaliser's re-run behaviour for both tables (esp. that snapshot `value`s won't + refresh on re-run after a vocabulary edit unless we deliberately re-materialise). + +## Key code references (from exploration) + +**Frontend (`assessment-model`):** +- [finalize/route.ts](../../src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/finalize/route.ts) — today's synchronous property insert (to be deleted). +- [property.ts](../../src/app/db/schema/property.ts) — `property` table (`property_type`, `built_form` columns exist; `uq_property_portfolio_uprn`). +- [landlord_overrides.ts](../../src/app/db/schema/landlord_overrides.ts) — the four per-component override tables + all pgEnums (`wallTypeEnum`, `roofTypeEnum`, `propertyTypeEnum`, `builtFormTypeEnum`, `overrideSourceEnum`). +- [bulk_address_uploads.ts](../../src/app/db/schema/bulk_address_uploads.ts) — `multiEntryOrdering` (permutations, `0=main`), `multiEntrySummary`, `verifyAck`, `combinedOutputS3Uri`. +- [ADR-0004](../adr/0004-multi-entry-building-part-ordering.md) — defers exactly this fact layer; the building-part ordering model. +- [ADR-0002](../adr/0002-landlord-override-vocabulary.md) — vocabulary layer. + +**Backend (`Model`):** +- `applications/landlord_description_overrides/handler.py` — the worker pattern to mirror (`subtask_handler`, `TaskOrchestrator`, trigger body, `commit_scope`). +- `infrastructure/postgres/landlord_wall_type_override_table.py` — SQLModel mirror pattern for the new `PropertyOverrideRow`. +- `infrastructure/postgres/landlord_override_enums.py` — shared `override_source` SAEnum pattern. +- `infrastructure/postgres/property_table.py` — `PropertyRow` defensive view ("backend never inserts" — to change). +- `repositories/landlord_overrides/landlord_override_repository.py` — repository pattern for the new override repo. +- `orchestration/landlord_description_overrides_orchestrator.py` — orchestrator pattern; note it splits cells into an orderless set (discards part order — recovered via `multiEntryOrdering`). +- Downstream: `orchestration/property_baseline_orchestrator.py` (re-score-on-override seam), `orchestration/ingestion_orchestrator.py`. + +## Docs to update (when this lands) + +- **CONTEXT.md**: `Property Type` / `built_form` are **per-part-capable**, not + whole-dwelling. Add the per-Property fact layer (`property_overrides`) to the + glossary + relationships. Possibly a `building_part` index definition. +- **New ADR** (frontend) for `property_overrides` + finaliser; companion Model ADR + for the cross-repo write, citing ADR-0003/0004. diff --git a/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx b/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx index 3bdfea4..3e64cca 100644 --- a/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx +++ b/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx @@ -27,6 +27,7 @@ import { WallTypeValues, RoofTypeValues, } from "@/app/db/schema/landlord_overrides"; +import { CLASSIFIER_FIELDS } from "@/lib/bulkUpload/columnFields"; // Valid enum options per classifier category, for the editable dropdowns (#299). const CATEGORY_VALUES: Record = { @@ -36,6 +37,14 @@ const CATEGORY_VALUES: Record = { roof_type: RoofTypeValues, }; +// Our category label per classifier field (e.g. built_form_type → "Built Form"). +// Distinguishes the categories when several read from the same source column — +// Property Type and Built Form both come from one column, so labelling by the +// customer's header alone is ambiguous. +const FIELD_LABEL: Record = Object.fromEntries( + CLASSIFIER_FIELDS.map((f) => [f.value, f.label]), +); + interface Props { portfolioSlug: string; portfolioId: string; @@ -290,7 +299,10 @@ function VerifyClassificationPanel({ return (

- {column.header} + {FIELD_LABEL[column.field] ?? column.field} + + — from your “{column.header}” column +

{entries.map((entry) => { @@ -438,7 +450,10 @@ function MultiEntryOrderingPanel({ Entry {orderColumns.map((column) => ( - {column.header} + {FIELD_LABEL[column.field] ?? column.header} + + your “{column.header}” + ))} Building part