Merge pull request #317 from Hestia-Homes/main

Additional columns in property export
This commit is contained in:
Daniel Roth 2026-06-15 13:09:13 +01:00 committed by GitHub
commit 4d62adc18b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
90 changed files with 186743 additions and 463 deletions

View file

@ -1,5 +1,28 @@
{
"permissions": {
"allow": [
"Read(//home/vscode/.claude/skills/**)",
"Bash(git fetch *)",
"Bash(git add *)",
"Bash(git commit *)",
"Bash(git merge *)",
"Bash(git pull *)",
"Bash(git push *)",
"Bash(git status *)",
"Bash(git checkout *)",
"Bash(git stash *)",
"Bash(git config *)",
"Bash(git branch *)",
"Bash(git worktree *)",
"Bash(git check-ignore *)",
"Bash(git ls-tree *)",
"Bash(npm install *)",
"Bash(npm run *)",
"Bash(npx drizzle-kit *)",
"Bash(pip install *)",
"Bash(terraform fmt *)",
"Bash(gh label *)"
],
"deny": [
"Bash(npx drizzle-kit generate)",
"Bash(npx drizzle-kit push)"

View file

@ -14,8 +14,7 @@
// the mounted host ~/.config/gh.
"postCreateCommand": "gh repo clone Hestia-Homes/agentic-toolkit /tmp/agentic-toolkit -- --branch 0.0.5 --depth 1 && bash /tmp/agentic-toolkit/setup.sh && npm install",
"forwardPorts": [3000],
"appPort": ["3000:3000"],
"forwardPorts": ["frontend:3000", "pgadmin:80"],
"mounts": [
// Optional, just makes getting from Downloads (local env) easier

View file

@ -9,7 +9,10 @@ services:
USER_GID: ${GID:-1000}
command: sleep infinity
ports:
- "3000:3000"
# Host port left unspecified so Docker assigns a free one — lets multiple
# worktrees of this repo run at once without colliding. VS Code's
# forwardPorts (below) forwards container :3000 to your machine.
- "3000"
volumes:
- ..:/workspaces/assessment-model
- ~/.gitconfig:/home/vscode/.gitconfig
@ -29,7 +32,9 @@ services:
image: dpage/pgadmin4
hostname: pgadmin
ports:
- 5556:80
# Dynamic host port (see frontend above). VS Code auto-detects and
# forwards the listening container port when the container comes up.
- "80"
env_file:
- ../.db-env
restart: unless-stopped

3
.gitignore vendored
View file

@ -41,3 +41,6 @@ next-env.d.ts
backlog/**
docs/adr/**
# Personal Claude Code settings (per-developer, not shared)
.claude/settings.local.json

View file

@ -38,13 +38,42 @@ The housing association supplying a Portfolio's BulkUploads. A Landlord knows fa
_Avoid_: customer, client, owner, organisation (Organisation is a separate, broader entity)
**Landlord override**:
A landlord-supplied fact about a property that takes precedence over EPC-derived defaults when computing an assessment. The end-to-end Landlord override journey has two layers — a **VocabularyMapping** layer (this glossary entry below) and a per-Property fact layer (not yet modelled).
A landlord-supplied fact about a property that takes precedence over EPC-derived defaults when computing an assessment. The end-to-end Landlord override journey has two layers — a **VocabularyMapping** layer (this glossary entry below) and a per-Property fact layer (the **Property override**, below).
_Avoid_: customer data, manual override, landlord data
**Property override**:
The per-Property fact layer — one resolved fact per `(Property, Building part, component)`, where component is one of `wall_type`/`roof_type`/`property_type`/`built_form_type`. Holds a **snapshot** of the resolved enum value (a denormalised copy of the VocabularyMapping outcome at finalise time, so two Properties sharing a description can later diverge), plus the original spreadsheet text it resolved from. Materialised by the finaliser **for UPRN-matched Properties only** (v2); the resolved value is never `UNKNOWN` — the Verify step forces every `UNKNOWN` to be mapped before Finalise, and an unresolved description fails the run. See [ADR-0005](./docs/adr/0005-async-bulk-upload-finaliser.md) (table) and [ADR-0006](./docs/adr/0006-property-overrides-join-and-no-uprn-defer.md) (population).
_Avoid_: per-property mapping, property fact, override row
**Source row id**:
A synthetic UUID minted per source-file row at `start-address-matching` and written into **both** the address CSV and the classifier CSV. It is the stable join key that lets the finaliser tie a row's identity (combiner output → `property_id`) to that row's raw descriptions (classifier CSV), since neither file preserves row order and `Internal Reference` is absent from the classifier CSV. See [ADR-0006](./docs/adr/0006-property-overrides-join-and-no-uprn-defer.md).
_Avoid_: row index, internal reference (a separate, optional landlord field)
**VocabularyMapping**:
The translation from a Landlord's free-text description in a BulkUpload column (e.g. `"cavity: filledcavity"`) to a canonical domain enum value (e.g. `WallType.CAVITY`). Produced by a `ColumnClassifier` (today an LLM, tomorrow possibly a lookup table or rules engine) in the Model service. Stored per-Portfolio, one row per `(category, description)`. A row carries provenance (`classifier` or `user`) so user overrides survive re-classification.
_Avoid_: column mapping (that's a separate concept — see `ColumnMapping` above), classification, dictionary
### Building parts
**Building part**:
One physically distinct part of a dwelling described by a single entry within a multi-valued cell. A dwelling is one **Main building** plus zero or more **Extensions**. Per-part descriptions appear as comma-separated entries in physical-element columns (e.g. `Walls`, `Roofs`); whole-dwelling columns (e.g. `Property Type`) carry a single entry and are **not** split per part.
_Avoid_: annexe, unit, section, dwelling part
**Main building**:
The principal building part of a dwelling — exactly one per address. The others are **Extensions**.
**Extension**:
A building part that is not the Main building, numbered **Extension 1 … Extension N-1** for an N-entry address.
_Avoid_: annexe, addition, outbuilding
**Multi-entry**:
The property of a BulkUpload row whose physical-element cells hold **more than one comma-separated entry**, one per **Building part**. Always intra-cell in our data — never multiple rows sharing one address/UPRN. Within a row, the multi-valued columns agree on entry-count, so **position `i` is the same Building part across every multi-valued column**.
_Avoid_: multi-row, multi-record, duplicate address
**Building-part ordering** (a.k.a. **ordering**):
The user's declaration, captured once per file, of which list-position maps to which Building part — because the entry order is a consistent per-file mistake (`"A, B"` could be `[Main, Extension 1]` or `[Extension 1, Main]`). Stored per entry-count as a permutation. See [ADR-0004](./docs/adr/0004-multi-entry-building-part-ordering.md).
_Avoid_: sort order, sequence, column mapping
## Lifecycle
A **BulkUpload** moves through these statuses:
@ -55,15 +84,20 @@ ready_for_processing
→ processing (Address matching triggered; Next.js writes)
→ combining (Combiner stage running; FastAPI writes directly)
→ awaiting_review (Combiner output in S3; FastAPI writes directly)
→ complete (Finalise succeeded; Next.js writes)
→ failed (FastAPI reports in-flight failure — schema only, not yet wired)
→ finalising (Finalise dispatched; Next.js writes via compare-and-swap)
→ complete (Finaliser succeeded; FastAPI/Lambda writes directly)
→ failed (Finaliser failed; FastAPI/Lambda writes directly)
```
`complete` and `failed` are terminal.
`complete` and `failed` are terminal. `finalising` is the in-flight state of the
async finaliser (mirrors `combining`); the UI renders it as "Uploading to ARA". See
[ADR-0005](./docs/adr/0005-async-bulk-upload-finaliser.md).
Re-mapping (PATCHing `columnMapping`) is legal only in `ready_for_processing` and `mapping_complete`. Any later state rejects with 409.
**Two writers**: Next.js owns transitions out of `mapping_complete`, into `processing`, and the terminal Finalise outcomes. FastAPI owns `combining` and `awaiting_review` — writing them direct to the DB during the combiner run. The BulkUpload aggregate observes both.
**Two writers**: Next.js owns transitions out of `mapping_complete`, into `processing`, and the `awaiting_review → finalising` compare-and-swap at Finalise dispatch. FastAPI/Lambda owns `combining`, `awaiting_review`, and the terminal `finalising → complete`/`failed` — writing them direct to the DB during the combiner and finaliser runs. The BulkUpload aggregate observes both. See [ADR-0005](./docs/adr/0005-async-bulk-upload-finaliser.md).
At `awaiting_review`, **Finalise is gated** (not a new status — a precondition on the action): when classifier columns were mapped the user must acknowledge the classification-verification step, and when the file is **Multi-entry** they must confirm the **Building-part ordering**. See [ADR-0004](./docs/adr/0004-multi-entry-building-part-ordering.md).
See [ADR-0001](./docs/adr/0001-bulk-upload-state-machine.md) for the deliberate "not yet" decisions baked into this lifecycle.
@ -73,6 +107,22 @@ See [ADR-0001](./docs/adr/0001-bulk-upload-state-machine.md) for the deliberate
- A **BulkUpload** produces zero or more **Properties** when finalised.
- A **BulkUpload** has at most one **Task** (the orchestration handle for the FastAPI pipeline run); a Task has many **SubTasks** (one per pipeline stage: address matching, combiner).
- A **Portfolio** has many **VocabularyMappings** — one row per `(category, description)` it has ever encountered across all its BulkUploads. See [ADR-0002](./docs/adr/0002-landlord-override-vocabulary.md).
- A **Recommendation** belongs to exactly one **Plan**. Denormalised onto `recommendation.plan_id`; the `plan_recommendations` join table is being retired.
- A **Recommendation** has at most one **Material**. Denormalised onto `recommendation.material_id` (+ `material_quantity`, `material_quantity_unit`, `material_depth`). Historically (pre-~2023) a recommendation could carry multiple materials; ~128 such legacy rows were reconciled to one each on 2026-06-07. The cardinality guard in the backfill enforces this going forward.
### Baseline performance
**Lodged performance**:
The SAP score, EPC band, CO₂ emissions, and primary energy intensity as submitted to the government EPC register. Ground truth from the register; never modified.
_Avoid_: original performance, registered performance
**Effective performance**:
The SAP score (and associated metrics) that the modelling engine actually uses as its baseline. Usually equals Lodged performance, but differs when a Landlord override or data-quality issue makes the lodged certificate unreliable — triggering a Rebaseline.
_Avoid_: current performance, adjusted performance
**Rebaseline**:
The act of substituting a corrected set of performance metrics in place of the Lodged values. Recorded on `property_baseline_performance` with a `rebaseline_reason` enum value: `none`, `pre_sap10`, `physical_state_changed`, or `both`.
_Avoid_: override, adjustment, correction
## Example dialogue
@ -81,6 +131,8 @@ See [ADR-0001](./docs/adr/0001-bulk-upload-state-machine.md) for the deliberate
>
> **Dev:** "And if **Finalise** runs and 30% of rows have no **UPRN**?"
> **Domain expert:** "Those still get imported as **Properties** — just without a UPRN — and the BulkUpload moves to `complete`. Manual cleanup happens later in the property table."
>
> _(Planned change — v3 / [ADR-0006](./docs/adr/0006-property-overrides-join-and-no-uprn-defer.md): no-UPRN rows will move to a separate staging table to be re-matched, so `property` holds only matched rows. v2 does **not** change this yet — and v2 writes **Property overrides** only for the UPRN-matched rows.)_
## Flagged ambiguities

View file

@ -0,0 +1,291 @@
# Drizzle schema handoff — pending EPC migrations
**Task:** Update Drizzle table definitions in `src/app/db/schema/property.ts` to match the
Python SQLModel definitions. Do **not** run `drizzle-kit generate` or any migration
commands — the developer will run generation manually after your changes.
Two sets of changes are covered here:
1. EPC property round-trip fidelity gaps (fixes an active production error)
2. New `property_baseline_performance` table
### Before starting: update the import line
`jsonb` is not currently imported. Add it (and `pgEnum` is already present):
```typescript
import {
bigserial,
text,
timestamp,
pgTable,
real,
pgEnum,
integer,
boolean,
smallint,
bigint,
uniqueIndex,
jsonb, // ← add this
} from "drizzle-orm/pg-core";
```
---
## 1. `epc_property` — new columns
Add these columns to the `epcProperty` table. All are nullable (no `.notNull()`).
```typescript
// Mechanical ventilation
mechanicalVentDuctInsulationLevel: integer("mechanical_vent_duct_insulation_level"),
// Addendum flags
addendumStoneWalls: boolean("addendum_stone_walls"),
addendumSystemBuild: boolean("addendum_system_build"),
addendumNumbers: jsonb("addendum_numbers"),
// Heating counts
heatingNumberBaths: integer("heating_number_baths"),
heatingNumberBathsWwhrs: integer("heating_number_baths_wwhrs"),
heatingElectricShowerCount: integer("heating_electric_shower_count"),
heatingMixerShowerCount: integer("heating_mixer_shower_count"),
// Ventilation detail
ventilationPresent: boolean("ventilation_present").notNull().default(false),
ventilationShelteredSides: integer("ventilation_sheltered_sides"),
ventilationHasSuspendedTimberFloor: boolean("ventilation_has_suspended_timber_floor"),
ventilationSuspendedTimberFloorSealed: boolean("ventilation_suspended_timber_floor_sealed"),
ventilationHasDraughtLobby: boolean("ventilation_has_draught_lobby"),
ventilationAirPermeabilityAp4M3HM2: real("ventilation_air_permeability_ap4_m3_h_m2"),
ventilationMechanicalVentilationKind: text("ventilation_mechanical_ventilation_kind"),
```
---
## 2. `epc_property` — type changes: `text``jsonb`
Change the following existing columns from `text(...)` to `jsonb(...)`. Preserve any
`.notNull()` that is currently present (none of these have it, but double-check).
| Property name | Column name |
| ------------------------------- | ---------------------------------- |
| `energyPvConnection` | `energy_pv_connection` |
| `heatingCylinderSize` | `heating_cylinder_size` |
| `heatingImmersionHeatingType` | `heating_immersion_heating_type` |
| `heatingCylinderInsulationType` | `heating_cylinder_insulation_type` |
| `heatingSecondaryHeatingType` | `heating_secondary_heating_type` |
| `heatingShowerOutletType` | `heating_shower_outlet_type` |
Example — before:
```typescript
heatingCylinderSize: text("heating_cylinder_size"),
```
After:
```typescript
heatingCylinderSize: jsonb("heating_cylinder_size"),
```
---
## 3. `epc_main_heating_detail` — type changes: `text``jsonb`
These four columns are currently `text(...).notNull()`. Change to `jsonb(...).notNull()`.
| Property name | Column name |
| -------------------- | ---------------------- |
| `mainFuelType` | `main_fuel_type` |
| `heatEmitterType` | `heat_emitter_type` |
| `emitterTemperature` | `emitter_temperature` |
| `mainHeatingControl` | `main_heating_control` |
---
## 4. `epc_building_part` — type changes and new columns
### 4a. Type changes: `text``jsonb`
| Property name | Column name | Currently nullable? |
| ----------------------------- | -------------------------------- | ----------------------------------------------------------------------------------------- |
| `wallConstruction` | `wall_construction` | no (`.notNull()`) |
| `wallInsulationType` | `wall_insulation_type` | no (`.notNull()`) |
| `partyWallConstruction` | `party_wall_construction` | **drop `.notNull()`** — Python has this as nullable; the TypeScript `.notNull()` is wrong |
| `flatRoofInsulationThickness` | `flat_roof_insulation_thickness` | yes |
| `roofInsulationLocation` | `roof_insulation_location` | yes |
| `roofInsulationThickness` | `roof_insulation_thickness` | yes |
### 4b. New columns (add, nullable)
```typescript
roofConstructionType: text("roof_construction_type"),
curtainWallAge: text("curtain_wall_age"),
```
---
## 5. `epc_window` — type changes: `text``jsonb`
| Property name | Column name | Currently nullable? |
| -------------------------- | ---------------------------- | ---------------------------------------------------------- |
| `glazingGap` | `glazing_gap` | no (`.notNull()`) |
| `orientation` | `orientation` | no (`.notNull()`) |
| `windowType` | `window_type` | no (`.notNull()`) |
| `glazingType` | `glazing_type` | no (`.notNull()`) |
| `windowLocation` | `window_location` | no (`.notNull()`) |
| `windowWallType` | `window_wall_type` | no (`.notNull()`) |
| `draughtProofed` | `draught_proofed` | no (`.notNull()`) — currently `boolean`, change to `jsonb` |
| `permanentShuttersPresent` | `permanent_shutters_present` | no (`.notNull()`) — currently `boolean`, change to `jsonb` |
| `transmissionDataSource` | `transmission_data_source` | yes |
> **Note on `draughtProofed` and `permanentShuttersPresent`:** these are `boolean` in the
> current TypeScript schema but `Union[bool, str]` JSONB in the Python model. Change them
> to `jsonb(...).notNull()` — the TypeScript boolean type was incorrect.
>
> These two columns also require **boolean-specific** USING clauses in the generated migration
> (PostgreSQL will not implicitly cast `boolean` to `jsonb`):
>
> ```sql
> ALTER TABLE "epc_window" ALTER COLUMN "draught_proofed"
> SET DATA TYPE jsonb
> USING to_json("draught_proofed")::jsonb;
>
> ALTER TABLE "epc_window" ALTER COLUMN "permanent_shutters_present"
> SET DATA TYPE jsonb
> USING to_json("permanent_shutters_present")::jsonb;
> ```
>
> `to_json` converts `true`/`false` to JSON booleans (not quoted strings), which is correct
> for the `Union[bool, str]` Python type.
---
## 6. New table: `epc_renewable_heat_incentive`
Add this table to `src/app/db/schema/property.ts`:
```typescript
export const epcRenewableHeatIncentive = pgTable(
"epc_renewable_heat_incentive",
{
id: bigserial("id", { mode: "bigint" }).primaryKey(),
epcPropertyId: bigint("epc_property_id", { mode: "bigint" })
.notNull()
.unique()
.references(() => epcProperty.id),
spaceHeatingKwh: real("space_heating_kwh").notNull(),
waterHeatingKwh: real("water_heating_kwh").notNull(),
impactOfLoftInsulationKwh: real("impact_of_loft_insulation_kwh"),
impactOfCavityInsulationKwh: real("impact_of_cavity_insulation_kwh"),
impactOfSolidWallInsulationKwh: real("impact_of_solid_wall_insulation_kwh"),
},
);
```
---
## 7. New table: `property_baseline_performance`
First, add the enum (before the table definition):
```typescript
export const rebaselineReasonEnum = pgEnum("rebaseline_reason", [
"none",
"pre_sap10",
"physical_state_changed",
"both",
]);
```
Then add the table to `src/app/db/schema/property.ts`:
```typescript
export const propertyBaselinePerformance = pgTable(
"property_baseline_performance",
{
id: bigserial("id", { mode: "bigint" }).primaryKey(),
propertyId: bigint("property_id", { mode: "bigint" })
.notNull()
.unique()
.references(() => property.id),
// Lodged performance (from gov EPC register)
lodgedSapScore: integer("lodged_sap_score").notNull(),
lodgedEpcBand: epcEnum("lodged_epc_band").notNull(),
lodgedCo2EmissionsTPerYr: real("lodged_co2_emissions_t_per_yr").notNull(),
lodgedPrimaryEnergyIntensityKwhPerM2Yr: integer(
"lodged_primary_energy_intensity_kwh_per_m2_yr",
).notNull(),
// Effective performance (what modelling scored against)
effectiveSapScore: integer("effective_sap_score").notNull(),
effectiveEpcBand: epcEnum("effective_epc_band").notNull(),
effectiveCo2EmissionsTPerYr: real(
"effective_co2_emissions_t_per_yr",
).notNull(),
effectivePrimaryEnergyIntensityKwhPerM2Yr: integer(
"effective_primary_energy_intensity_kwh_per_m2_yr",
).notNull(),
rebaselineReason: rebaselineReasonEnum("rebaseline_reason").notNull(),
// Interim energy demand (from EPC RHI data; superseded by bill block below once populated)
spaceHeatingKwh: real("space_heating_kwh").notNull(),
waterHeatingKwh: real("water_heating_kwh").notNull(),
// Bill block — nullable until BillDerivation wiring lands
fuelRatesPeriod: text("fuel_rates_period"),
heatingKwh: real("heating_kwh"),
heatingCostGbp: real("heating_cost_gbp"),
hotWaterKwh: real("hot_water_kwh"),
hotWaterCostGbp: real("hot_water_cost_gbp"),
lightingKwh: real("lighting_kwh"),
lightingCostGbp: real("lighting_cost_gbp"),
appliancesKwh: real("appliances_kwh"),
appliancesCostGbp: real("appliances_cost_gbp"),
cookingKwh: real("cooking_kwh"),
cookingCostGbp: real("cooking_cost_gbp"),
pumpsFansKwh: real("pumps_fans_kwh"),
pumpsFansCostGbp: real("pumps_fans_cost_gbp"),
coolingKwh: real("cooling_kwh"),
coolingCostGbp: real("cooling_cost_gbp"),
standingChargesGbp: real("standing_charges_gbp"),
segCreditGbp: real("seg_credit_gbp"),
totalAnnualBillGbp: real("total_annual_bill_gbp"),
},
);
```
---
## Post-generation checklist (developer action, not Claude)
After running `drizzle-kit generate`, **manually edit the generated `.sql` file** before
applying it. For every `ALTER COLUMN ... SET DATA TYPE jsonb` statement, add a `USING`
expression to handle existing rows safely. Without it, any row with a bare unquoted
string (e.g. `Electric Shower`) will cause the migration to fail.
Replace the generated form:
```sql
ALTER TABLE "epc_property" ALTER COLUMN "heating_shower_outlet_type" SET DATA TYPE jsonb;
```
With:
```sql
ALTER TABLE "epc_property" ALTER COLUMN "heating_shower_outlet_type"
SET DATA TYPE jsonb
USING (
CASE
WHEN "heating_shower_outlet_type" IS NULL THEN NULL
WHEN "heating_shower_outlet_type" ~ '^-?[0-9]+$' THEN "heating_shower_outlet_type"::jsonb
ELSE to_json("heating_shower_outlet_type")::jsonb
END
);
```
Apply this pattern to **every** `text → jsonb` column across all four tables
(`epc_property`, `epc_main_heating_detail`, `epc_building_part`, `epc_window`).

View file

@ -0,0 +1,291 @@
# Handover: `bulk_upload_finaliser` v2 — populate `property_overrides`
> **Purpose.** Self-contained brief to start a fresh context implementing v2 (the
> per-Property fact layer) in the existing `bulk_upload_finaliser` Lambda. v1
> (async finalise that writes `property`) is **shipped and working end-to-end**.
> This doc assumes no memory of the v1 session.
## 0. Design resolved — grilling outcome (2026-06-05)
> The open questions in §9 were resolved in a design session. **This section is now
> authoritative**; the later sections are kept for background but where they conflict
> with this one, this one wins. The new v2 ADR is
> [`docs/adr/0006-property-overrides-join-and-no-uprn-defer.md`](../adr/0006-property-overrides-join-and-no-uprn-defer.md);
> ADR-0004 was amended for per-count ordering capture.
**Spine.** Populate `property_overrides` at finalise **for UPRN-matched rows only**.
Join the classifier descriptions to the combiner identity by a **synthetic UUID
`source_row_id`** — *not* `Internal Reference` (it is **absent from the classifier
CSV**, and optional anyway) and *not* by carrying description columns through
`address2uprn` (architecture B, rejected). This is architecture **(A)** with a
purpose-built key.
**No-UPRN rows are deferred to v3.** v1 *currently* inserts them as `property` rows;
**v2 changes nothing in the property insert** and simply writes no overrides for
them. The eventual home for unmatched rows is a **separate staging table** (Model B):
`property` holds only matched rows; unmatched inputs (with their descriptions) live in
the staging table until a *different UPRN matcher* assigns a UPRN and promotes them.
"Found vs unfound" is a view across both tables, **not** a flag on `property`. v3 owns
the property-insert change + the staging table + the matcher-rerun UX together.
**Frontend work** (`/workspaces/assessment-model`):
1. **Mint `source_row_id`** (UUID) in `start-address-matching` right after
`readRows()`, and **explicitly emit it as a column in both** `buildAddressCsv` and
`buildClassifierCsv` — both project a *fixed* column set, so attaching it to the
row object is not enough. It survives `address2uprn`→combiner like any input column
(carried as `additional_info`); **verify against a real combiner output**.
2. **Per-count ordering capture** (supersedes ADR-0004's largest-count-only):
`detectMultiEntry` keeps a sample **per distinct count**; `OnboardingProgress`
renders one ordering panel **per count ≥ 2**. The jsonb type and
`setMultiEntryOrdering` validation already accept all counts — **no migration, no
backend-validation change**.
3. **Verify gate hardened**: Finalise is blocked while **any** description is still
`UNKNOWN`. `UNKNOWN` is now a **transient "needs review" marker, never a final
value** (this retires the old "`UNKNOWN` is legitimate" line in §7).
4. **`dispatchFinaliser`** adds **two fields to the trigger body**:
`classifier_s3_uri` and `multi_entry_ordering` (it already reads the
`bulk_address_uploads` row, and dispatch happens *after* the user confirms
ordering, so the value is final). The classifier S3 key comes from a **shared
`classifierCsvKey(portfolioId, uploadId)` helper** used by both the writer and the
dispatcher (the key is not stored anywhere today — convention only).
**Backend work** (`/workspaces/home/github/Model`):
5. Grow the trigger schema in two places — FastAPI `FinaliserTriggerRequest` and
Lambda `BulkUploadFinaliserTriggerBody` — with `classifier_s3_uri` +
`multi_entry_ordering`. Handler stays trigger-driven (no new `bulk_address_uploads`
coupling).
6. **`PropertyOverrideRow`** table mirror + a **sibling `PropertyOverrideRepository`**
(own aggregate; upsert on `(property_id, override_component, building_part)`), and a
**read-only `LandlordOverrideRepository`** that loads a portfolio's vocabulary
**per component into dicts once** (the vocabulary is deduplicated, not per-row).
7. **Orchestrator step**, in the same `commit_scope`:
- bulk `SELECT (portfolio_id, uprn) → id` for the run's UPRN rows → in-memory map;
- join classifier↔combiner rows by `source_row_id`;
- **uniform comma-split all four components**`permutations[count]` → parts
(count-1 cell → `building_part = 0`); the finaliser needs **no fallback** because
every count ≥ 2 has a confirmed permutation;
- resolve each part's **normalized** description against the override dicts;
- `original_spreadsheet_description` = the **raw** entry text (un-normalized);
- **empty cell → write no row**; **non-empty but unresolved (or `UNKNOWN`) → raise**
`commit_scope` rolls back → `_mark_failed` flips the upload to `failed`
(**fail loudly, no partial writes**);
- write only the classifier components actually **mapped** in `columnMapping`;
- **no `source` column in v2** — upsert is unconditional for now.
**Locked assumptions (load-bearing — see ADR-0006).**
- **One real upload per user.** A re-upload only adds *new* properties (ones not
previously included), never re-describes existing ones → part-keys are append-only
across uploads → **upsert-only, no delete-orphans** is correct and complete.
- **Per-count consistency.** One ordering per count, confirmed from one sample, applies
to every cell of that count in the file (extends ADR-0004's bet to all counts).
- **Per-cell count.** `Walls` may split into 3 while `Roofs` splits into 2 in the same
row; each cell is ordered by *its own* entry count.
- **Classification completes before `awaiting_review`**, and the hardened verify gate
forces every `UNKNOWN` to be resolved — so an unresolved description at finalise is a
genuine defect, hence fail-loud.
## 1. Where v1 left things (read first)
v1 made **Finalise** an async dispatched Lambda that writes `property` rows. The
full flow works in dev: dispatch `202` → SQS → Lambda inserts properties + writes
terminal status → UI advances to "Processing complete".
Authoritative background — **read these before coding**:
- `docs/design/bulk-upload-finaliser.md` — the full grilling/design doc (schema Q6Q9, snapshot-not-FK, recalculate-on-rerun, the v2 input hazards).
- `docs/adr/0005-async-bulk-upload-finaliser.md` (frontend) — state machine + `property_overrides` shape.
- `/workspaces/home/github/Model/docs/adr/0013-bulk-upload-finaliser-writes-properties.md` (backend) — the Lambda write path + DDD layering.
- `docs/adr/0004-multi-entry-building-part-ordering.md`**critical for v2**: how building parts and `multiEntryOrdering` work.
- `docs/adr/0002-landlord-override-vocabulary.md` — the vocabulary (`landlord_*_overrides`) layer v2 resolves against.
- `CONTEXT.md` — glossary: **Property override**, **Building part**, **Main building**, **Extension**, **Multi-entry**, **Building-part ordering**, **VocabularyMapping**.
**Convention that must hold (it was corrected hard in v1):** in the Model repo,
business logic lives in `orchestration/*_orchestrator.py`; the Lambda
`applications/*/handler.py` stays thin (parse trigger, wire infra, delegate). One
repository per aggregate; orchestrators never commit (the handler owns the
transaction via `commit_scope`). See memory `model-ddd-layering`.
## 2. v2 goal
Populate `property_overrides` during finalise: for each property, write one row per
`(building_part, override_component)` carrying the **resolved enum snapshot** of the
landlord's description for that part.
## 3. The target table (already shipped — migration 0221, do NOT re-migrate)
Drizzle: `src/app/db/schema/property_overrides.ts`.
```
property_overrides
id uuid pk
property_id bigint NOT NULL FK → property.id ON DELETE CASCADE
portfolio_id bigint NOT NULL FK → portfolio.id ON DELETE CASCADE
building_part smallint NOT NULL -- 0 = main, 1 = ext 1, 2 = ext 2, …
override_component override_component NOT NULL -- pgEnum {wall_type, roof_type, property_type, built_form_type}
override_value text NOT NULL -- snapshot of the resolved enum value
original_spreadsheet_description text NOT NULL -- raw cell text it resolved from
created_at / updated_at timestamptz NOT NULL
UNIQUE (property_id, override_component, building_part)
```
## 4. Design decisions already locked (do not relitigate)
- **Snapshot, not FK.** `override_value` is a denormalised text copy of the resolved
enum, taken at materialise time — *not* an FK to `landlord_*_overrides`. This is
what lets two properties sharing a description diverge later, and is required
because there are four polymorphic vocabulary tables. Lineage is the natural key
`(portfolio_id, override_component, original_spreadsheet_description)`.
- **Re-run = recalculate.** Write with `onConflictDoUpdate` on
`(property_id, override_component, building_part)`, refreshing `override_value` +
`original_spreadsheet_description` + `updated_at`. (Contrast `property`, which is
`onConflictDoNothing`.) When a per-property user-edit path eventually exists, this
upsert will need a `WHERE source='classifier'` guard — but there is **no `source`
column in v1**; add it as a nullable column only when that path is built.
- **`override_component` values** are exactly the classifier category keys
(`wall_type`, `roof_type`, `property_type`, `built_form_type`) used in both
`src/lib/bulkUpload/columnFields.ts` and the Model `ClassifiableColumn.name` — no
translation.
- **`building_part` indexing**: `0 = Main building`, `1 = Extension 1`, … per ADR-0004.
- **Whole-dwelling components** (`property_type`, `built_form_type`) are per-part-
capable but today's files supply them once → usually written at `building_part = 0`.
## 5. The hard part: assembling the inputs (this is the real v2 work)
The combiner output (what the v1 finaliser reads) carries **only** address/UPRN
columns — `Address 1/2/3`, `postcode`, `Internal Reference`, `address2uprn_uprn`,
`address2uprn_address`, `address2uprn_lexiscore`. The **raw `Walls`/`Roofs`/
`Property Type`/`Built Form` cells are NOT in it.** They live only in:
- the **classifier CSV** on S3 — `bulk_onboarding_inputs/{portfolioId}/{uploadId}-classifier.csv` (original landlord headers), and
- `landlord_*_overrides` in Postgres — the *resolved* values keyed by `(portfolio_id, normalized description)`.
To write one `property_overrides` row, v2 must assemble **four inputs**:
| Need | Source |
|---|---|
| `property_id` (identity) | combiner output → `(portfolio_id, uprn)`**but no-UPRN rows have no key** |
| raw cell text per row | the classifier CSV (not the combiner output) |
| split a multi-valued cell → building parts | `multiEntryOrdering` on `bulk_address_uploads` |
| description → `override_value` | `landlord_*_overrides` (resolve by normalized description) |
### Two open hazards — both RESOLVED (see §0)
1. **Join key (RESOLVED).** Investigation confirmed `Internal Reference` is in the
address CSV + combiner output but **NOT in the classifier CSV**, and is optional.
So architecture (A)-by-`Internal Reference` is dead. **Resolution: mint a synthetic
UUID `source_row_id`** in `start-address-matching` after `readRows()`, emitted as an
explicit column in *both* CSVs. It is the join key. (Architecture (A) with a
purpose-built key; (B) "carry descriptions through `address2uprn`" was rejected.)
2. **`property_id` for no-UPRN rows (RESOLVED by descoping).** v2 writes overrides
**only for UPRN rows**, whose `property.id` is re-found by `(portfolio_id, uprn)`
— so **no `RETURNING` correlation is needed**. No-UPRN rows are deferred to v3
(Model B staging table); v2 leaves the property insert untouched.
## 6. `multiEntryOrdering` — how to split cells into parts
Persisted on `bulk_address_uploads` (`src/app/db/schema/bulk_address_uploads.ts`):
```ts
MultiEntryOrdering { permutations: Record<string, number[]>; confirmed: boolean }
// permutations[count][k] = the 0-based FILE position holding building part k
// where 0 = Main building, 1..N-1 = Extension 1..N-1.
// e.g. { "2": [1, 0] } => for 2-entry rows, the main building is file position 1.
```
A multi-valued cell (e.g. `Walls = "Cavity: …, Solid brick: …"`) splits on commas
into entries by file position; `permutations[count]` maps file position → building
part. **Caveat (ADR-0004):** only the **largest count** permutation is captured this
iteration; other counts need a derivation rule — decide it in v2.
`multiEntrySummary` holds the detected multi-valued columns + **normalized**
description keys (the normalization that matches the classifier's stored keys:
`split → strip → lower`).
## 7. Resolving description → value (`landlord_*_overrides`)
Four per-component tables in `src/app/db/schema/landlord_overrides.ts`
(`landlord_wall_type_overrides`, `…_roof_type_…`, `…_property_type_…`,
`…_built_form_type_…`), each `UNIQUE (portfolio_id, description)`, value typed by the
component's pgEnum, plus a `source` (`classifier`|`user`). Resolve a normalized
description → `value`. The frontend already does this read in
`src/lib/bulkUpload/server.ts` (`lookupOverrides`) — mirror that mapping on the
backend. **`UNKNOWN` is now a transient "needs review" marker, never a final
value** (resolved in §0): the verify gate forces the user to map every `UNKNOWN`
before Finalise, so a `UNKNOWN` (or unresolvable description) reaching the finaliser
is a defect and **fails the run loudly**.
## 8. Backend pieces to build (DDD, mirror v1)
In `/workspaces/home/github/Model`:
- **`PropertyOverrideRow`** SQLModel mirror → `infrastructure/postgres/property_override_table.py` (mirror the pattern in `property_table.py` / `landlord_*_override_table.py`; reuse a shared `override_component` SAEnum like `landlord_override_enums.py`).
- **Repository** for the override write (one per aggregate): add to
`repositories/property/` (e.g. extend the property repo or a sibling
`property_override` repo), with an `upsert_all` using
`on_conflict_do_update(index_elements=[property_id, override_component, building_part], …)`.
- **Orchestrator logic** in `orchestration/bulk_upload_finaliser_orchestrator.py`:
extend `finalise(...)` (or add a step) to, after inserting properties and getting
ids, build the override rows (join → split by part → resolve) and persist them in
the **same** `commit_scope`.
- **Handler** stays thin — it already wires S3 + engine + repos. It will need the
extra input (classifier CSV and/or `multiEntryOrdering`); decide how those reach
the Lambda (extend `BulkUploadFinaliserTriggerBody`, or read `bulk_address_uploads`
for `multiEntryOrdering` + the classifier S3 URI). The trigger currently carries
`task_id, sub_task_id, s3_uri (combiner output), portfolio_id, bulk_upload_id`.
Key v1 files to extend (all in the Model repo):
- `applications/bulk_upload_finaliser/handler.py`
- `orchestration/bulk_upload_finaliser_orchestrator.py`
- `repositories/property/property_repository.py` + `property_postgres_repository.py`
- `infrastructure/postgres/property_table.py` (reference for the new mirror)
- `infrastructure/s3/csv_s3_client.py` (`read_rows`)
- Packaging test: `tests/test_lambda_packaging.py` will flag any new top-level import
the Dockerfile doesn't `COPY` (v1 hit this with `datatypes/`).
## 9. Open questions — all RESOLVED (see §0 + ADR-0006)
- **Join key** → synthetic UUID `source_row_id` in both CSVs (not `Internal
Reference`, not architecture B).
- **`property_id` for no-UPRN rows** → out of scope; v2 is UPRN-only, no-UPRN deferred
to v3 (Model B). UPRN rows re-found by `(portfolio_id, uprn)`; no `RETURNING`.
- **Non-largest-count `multiEntryOrdering`** → capture a confirmed permutation for
**every** count ≥ 2 in the UI (supersedes ADR-0004); finaliser needs no fallback.
- **Trigger body vs handler-reads-DB****grow the trigger body** (`classifier_s3_uri`
+ `multi_entry_ordering`), built in `dispatchFinaliser`.
- **Re-materialise** → recalculate every finalise via **upsert-only** on
`(property_id, override_component, building_part)`; **no delete-orphans** (justified
by the one-real-upload assumption); `property` rows untouched.
## 10. Implementation order (design is settled — build it)
Frontend first (the finaliser depends on `source_row_id` + per-count ordering):
1. **`source_row_id`**: shared `classifierCsvKey` helper; mint the UUID in
`start-address-matching` after `readRows()`; emit it as an explicit column in both
`buildAddressCsv` and `buildClassifierCsv`. Verify it lands in a real combiner
output.
2. **Per-count ordering**: `detectMultiEntry` keeps a sample per count;
`OnboardingProgress` renders one ordering panel per count ≥ 2. Drop the
largest-count-only assumption in `setMultiEntryOrdering` if it requires the largest.
3. **Verify gate**: block Finalise while any classification is `UNKNOWN`.
4. **`dispatchFinaliser`**: add `classifier_s3_uri` + `multi_entry_ordering` to the
trigger payload.
Backend:
5. Grow `FinaliserTriggerRequest` (FastAPI) + `BulkUploadFinaliserTriggerBody` (Lambda).
6. `PropertyOverrideRow` mirror + sibling `PropertyOverrideRepository` (upsert) +
read-only `LandlordOverrideRepository`.
7. Orchestrator step (join → split → resolve → upsert; fail-loud on unresolved),
TDD against fakes (mirror
`tests/orchestration/test_bulk_upload_finaliser_orchestrator.py`).
8. Handler wiring; watch `tests/test_lambda_packaging.py` for Dockerfile COPY gaps.
Docs (done in this session): ADR-0004 amended, ADR-0006 added, `CONTEXT.md`
"Property override" updated.
## 11. Verification notes (environment)
- Frontend: `npx tsc --noEmit` (was 0 errors at v1 close).
- Model repo: `mypy`/`pytest` need a deps-installed env (the v1 session couldn't run
them locally; `/app` Docker config runs the full suite). `terraform plan` needs the
CLI. Watch `tests/test_lambda_packaging.py` for Dockerfile COPY gaps.
- v1 is committed; dev Lambda + SQS queue are deployed and working
(`FINALISER_SQS_URL` wired in `backend/.env` for local, and in terraform/fast-api).

View file

@ -0,0 +1,243 @@
# Design WIP: `bulk_upload_finaliser` + `property_overrides`
> **Status:** v1 fully resolved (grilling 2026-06-04). Ready to graduate to ADR(s).
> v2 (`property_overrides` population) deferred to its own session — see the
> "Input" application-flow item for its entry point. When decisions stabilise this should
> graduate into a new ADR in `docs/adr/` (frontend) and likely a companion ADR
> in the Model repo, plus a CONTEXT.md update (see "Docs to update").
## Goal
Two linked pieces of work:
1. **New backend application `bulk_upload_finaliser`** (lives in
`/workspaces/home/github/Model/applications/`, DDD-aligned — study
`/workspaces/home/github/Model/domain`). It reads the address-matching /
combiner output **and** the `landlord_*_overrides` vocabulary tables, then
writes Postgres correctly: the `property` rows (UPRN + address, as the
frontend does today) and — later — the new `property_overrides` rows.
Motivation: a property list can be ~40,000 rows, too big for a synchronous
Next.js HTTP handler.
2. **New `property_overrides` table** — the per-Property fact layer that
**ADR-0004 explicitly deferred** ("the per-Property building-part fact layer
that consumes `multiEntryOrdering` and writes main/extension facts at
finalise"). One row per `(property, building_part, component)` carrying the
resolved enum value + provenance.
**Split into two pieces (decided 2026-06-04):**
- **v1 — async finaliser writes `property`.** Move today's synchronous Next.js
`/finalize` property-insert into a dispatched Lambda (`bulk_upload_finaliser`),
because a property list can be ~40,000 rows. Reproduces the exact 9-column insert
+ `onConflictDoNothing`, adds the `finalising` status + async state machine, and
shifts terminal-status ownership to the backend. **Fully designed — ADR-ready.**
- **v2 — populate `property_overrides`.** The per-Property fact layer. The *table*
already shipped (migration 0221, PR #306), but population is a **separate
follow-up** with its own open input-plumbing questions (see the "Input" item
under application-flow questions). Not designed here.
This doc resolves **v1 in full**; v2 gets its own grilling session against real
classifier-CSV / combiner-output samples.
## Where this sits in the existing pipeline
```
BulkUpload → address matching → combiner → awaiting_review → [Finalise]
(new) bulk_upload_finaliser ──────────┘
reads: combiner output (S3) + landlord_*_overrides
writes: property (+ later property_overrides)
downstream: Ingestion (EPC/solar fetch)
→ PropertyBaseline (stage 2,
re-score-on-override seam,
Model ADR-0011/0012)
```
`Finalise` (the user action + state-machine gate) stays in Next.js; the new
application is the **worker it dispatches**. Downstream `PropertyBaseline`
already has an override-aware "re-score" seam — `property_overrides` will feed it.
## Decisions locked
| # | Decision |
|---|----------|
| Name | Application is **`bulk_upload_finaliser`**, in `Model/applications/`. `Finalise` stays the Next.js action that triggers it. |
| DDD | Follow the DDD structure under `Model/domain`. Domain terms discovered as needed. |
| Schema ownership | **Drizzle (frontend) owns migrations** for both `property` and the new `property_overrides`. |
| Backend access | Backend gets a **`PropertyOverrideRow` SQLModel** (mirror, like `landlord_wall_type_override_table.py`) + a **repository** (see `Model/infrastructure/postgres` + `Model/repositories` for examples). `PropertyRow` must drop its "backend never inserts" invariant and gain insertable columns. |
| Next.js `/finalize` | **Delete it** — fully replaced by the Lambda. |
| `property_overrides` shape | **Single polymorphic table**, not per-component tables. Accepts losing DB-level pgEnum typing on `value`. |
| `override_value` | `text` — a **denormalised snapshot copy** (own value per row) of the resolved enum from `landlord_*_overrides` at materialise time. Own-value (not an FK to the vocabulary) is what lets two properties sharing a description later **diverge**, and lets re-run recalculate one property's value without touching its siblings. |
| Snapshot, **not** FK to vocabulary | `property_overrides` does **not** foreign-key the originating `landlord_*_overrides` row. An FK forces every property sharing a description to share one value (forbids divergence); is structurally impossible as a real FK (4 polymorphic target tables, each with its own value enum); and would risk cascade-deleting per-property facts when re-classification prunes a vocabulary row. Lineage is preserved as a **natural key**`(portfolio_id, override_component, original_spreadsheet_description)` re-finds the vocabulary row (its `UNIQUE` is `(portfolio_id, description)`) — so deliberate re-sync needs no surrogate FK. |
| Re-run = **recalculate** | The finaliser write to `property_overrides` is `onConflictDoUpdate` on `(property_id, override_component, building_part)`, refreshing `override_value` + `original_spreadsheet_description` + `updated_at` to the latest resolution. Contrast `property`, which stays `onConflictDoNothing` (identity rows, don't churn). When per-property `source='user'` edits exist, the update must guard `WHERE source='classifier'` to preserve hand-edits (mirrors the Model classifier upsert). |
| `building_part` | **`smallint NOT NULL`**, explicit index: `0 = main building, 1 = extension 1, 2 = extension 2, …` (matches ADR-0004 `multiEntryOrdering.permutations` indexing). |
| Whole-dwelling components | **No special case.** `property_type`/`built_form` are *per-part-capable* too (an extension — conservatory, summer house — can be a different built form / property type). Today's files only supply them once, so they'll usually be written at `building_part = 0` only, but the schema allows per-part with no future migration. |
## `property_overrides` — columns so far
Roughly (subject to remaining open questions):
```
property_overrides
id uuid pk (default random) -- match landlord_* tables
property_id bigint NOT NULL FK → property.id (FE-owned table)
portfolio_id bigint NOT NULL FK → portfolio.id
building_part smallint NOT NULL -- 0 = main, 1 = ext 1, 2 = ext 2, …
override_component override_component NOT NULL -- column name == enum type name; pgEnum {wall_type, roof_type, property_type, built_form_type} (Q6 ✓)
override_value text NOT NULL -- snapshot copy of landlord_* resolved enum (free text; `override_component` carries the typing)
-- (no `source`) — dropped Q9: pure value snapshot; add back as nullable column if/when a per-property edit path needs provenance
original_spreadsheet_description text NOT NULL -- raw spreadsheet cell text this snapshot resolved from (Q7 ✓)
created_at timestamptz NOT NULL default now()
updated_at timestamptz NOT NULL default now()
-- UNIQUE (property_id, override_component, building_part) -- Q8 ✓ (source NOT in key — mirrors ADR-0004 single-row flip; portfolio_id implied by property_id)
-- FK property_id → property.id ON DELETE CASCADE; portfolio_id → portfolio.id (Drizzle only; bare bigint in SQLModel mirror); portfolio_id kept (matches property_details_epc / property_targets)
```
## Open questions (resume here)
- **Q6 — `component` discriminator. RESOLVED 2026-06-04.** pgEnum
**`override_component`** (column `component`) with values
**`wall_type`, `roof_type`, `property_type`, `built_form_type`**. Verified these
are the *exact* keys used both in the frontend
([columnFields.ts:30-33](../../src/lib/bulkUpload/columnFields.ts#L30-L33)) and
the backend (`ClassifiableColumn.name` / handler `_build_columns()`), so the
finaliser maps category → component with **no translation**. pgEnum over text:
small closed set, typos caught at write time — and this is now the *only*
DB-level typing left on a row, since `override_value` is free text. New component
= one-line `ALTER TYPE … ADD VALUE` (Drizzle-owned). Enum named `override_*`
(not `property_*`) to sit with `override_source` and stay visually distinct from
the existing *value* enum `property_type`.
- **Q7 — store raw description per override row? RESOLVED 2026-06-04: yes, as
`original_spreadsheet_description text NOT NULL`.** Names the *source artifact*
(the spreadsheet cell), not an actor — sidesteps the Landlord-vs-User conflation
the glossary warns against, and aligns with CONTEXT.md's "the source file". Stored
because `override_value` is a denormalised snapshot that deliberately won't
refresh on later vocabulary edits; pinning the original text makes each row
self-explaining and re-resolvable even after the source `landlord_*_overrides` row
changes. `NOT NULL` is safe **iff** every `property_overrides` row is materialised
from a `landlord_*_overrides` row (whose `description` is itself `NOT NULL`) —
confirm when settling Q9/source semantics.
- **Q8 — uniqueness + FKs. RESOLVED 2026-06-04.**
`UNIQUE (property_id, override_component, building_part)`. `building_part` is in
the key (part 0 and part 1 both carry e.g. a `wall_type` row). `source` is
**deliberately not** in the key — mirrors ADR-0004's single-row-flip (one row,
flip `source` in place; the two-row model was rejected). `portfolio_id` is not in
the key (implied by `property_id`) but **is kept as a column** for query ergonomics
and consistency with `property_details_epc` / `property_targets`, which both
denormalise it. FKs: `property_id → property.id ON DELETE CASCADE`;
`portfolio_id → portfolio.id ON DELETE CASCADE` in the Drizzle migration, but a
bare `bigint` (no FK) in the backend `PropertyOverrideRow` SQLModel mirror —
matching `landlord_wall_type_override_table.py`.
- **Q9 — `source` semantics. RESOLVED 2026-06-04: drop `source` entirely.**
`property_overrides` is a pure **snapshot of resolved values**. Rationale: there
is no per-property override concept today (per ADR-0004 edits happen at the
**vocabulary/portfolio** level, flipped in place), so a copied `source` would
describe the *vocabulary mapping's* provenance, not this property's — a footgun a
reader/re-score rule could misread, and no consumer needs it in v1. When a genuine
per-property edit path lands (the real use for per-property provenance), `source`
returns as an **additive nullable-column migration** — no need to carry it now.
This also confirms the Q7 `NOT NULL` contingency: every row is still materialised
from a `landlord_*_overrides` row (`description NOT NULL`).
- **Q-scope — v1 scope. RESOLVED 2026-06-04.** v1 = the finaliser reproduces
today's **exact 9-column `property` insert** (`portfolio_id`,
`creation_status='READY'`, `uprn`, `landlord_property_id``Internal Reference`,
`address` = matched ?? user-inputted, `postcode`, `user_inputted_address`,
`user_inputted_postcode`, `lexiscore`) **+** `onConflictDoNothing` on
`(portfolio_id, uprn) where uprn is not null` — not a reduced "UPRN + address".
This sizes the "PropertyRow gains insertable columns" decision to all nine
columns plus `creation_status`. The `property_overrides` *table* shipped ahead
(migration 0221, PR #306) but is **not populated** in v1 — population is
follow-up work (and needs a different input source; see combiner-output note
below).
### Application-flow questions not yet reached
- **Trigger + orchestration. RESOLVED 2026-06-04.** Mirror the
`start-address-matching` path. Next.js creates a `SubTask` (`service:
"finaliser"`) under the BulkUpload's existing Task, then POSTs a new FastAPI
endpoint `POST /v1/bulk-uploads/trigger-finaliser` (auth via `validate_token`),
which enqueues to a **new SQS queue**; a Lambda runs the finaliser wrapped in
`@subtask_handler` (auto-injected `TaskOrchestrator`; `run_subtask` owns the
subtask start/complete/fail + Task cascade). Trigger body
`FinaliserTriggerBody { task_id, sub_task_id, s3_uri (combined output), portfolio_id }`
(extends `SubtaskTriggerBody`). Slow work stays outside the txn; persistence in a
`commit_scope`. The synchronous Next.js `/finalize` route is deleted (locked).
- **State machine / who writes `complete`. RESOLVED 2026-06-04.** New status
**`finalising`** between `awaiting_review` and `complete` (mirrors `combining`
before `awaiting_review`). Lifecycle:
`awaiting_review → finalising → complete` (↘ `failed`).
- **`finalising`** written by **Next.js at dispatch** via a **compare-and-swap**:
`UPDATE … SET status='finalising' WHERE id=? AND status='awaiting_review'`
0 rows ⇒ already dispatched ⇒ 409. This is the **double-dispatch guard** (closes
the simultaneous-click race under `loadForFinalize`'s existing precondition).
- **`complete` / `failed`** written by the **Lambda** directly to
`bulk_address_uploads` (new `set_finalized_status` / `set_failed_status`,
exactly like the combiner's `set_combining_status` /
`set_combined_output_s3_uri`). `markFinalized` + the Next.js `/finalize` route
are deleted.
- **CONTEXT.md "Two writers" change:** Next.js owns dispatch + the
`awaiting_review → finalising` CAS; the backend owns `finalising → complete`
and `→ failed` (in addition to `combining` / `awaiting_review`).
- **UI vs canonical:** persisted enum value is `finalising` (canonical; ties to
the **Finalise** action). The frontend renders it as **"Uploading to ARA"** — a
display-layer label only, **not** the enum name, so UX copy never needs a
migration.
- **Input — does the combiner output carry the raw description cells? RESOLVED
2026-06-04: NO. This is a v2 problem (deferred).** v1 needs only address/UPRN
columns, all **confirmed present** in the combiner output (`address2uprn_uprn`,
`address2uprn_address`, `address2uprn_lexiscore`, `Internal Reference`,
`Address 1/2/3`, `postcode`). The raw `Walls`/`Roofs`/`Property Type`/`Built Form`
cells are **not** in the combiner output — they survive only in (a) the
`{uploadId}-classifier.csv` on S3 (original headers) and (b) `landlord_*_overrides`
as *resolved* values keyed by description. So v2 population must assemble **four
inputs**, not one file:
- `property_id` (identity) ← combiner output `(portfolio_id, uprn)` — **but
no-UPRN rows have no such key**;
- raw cell text ← the classifier CSV (not the combiner output);
- cell → building-part split ← `multiEntryOrdering` on `bulk_address_uploads`;
- description → `override_value``landlord_*_overrides` (normalized description).
- **Two open v2 hazards (entry point for the v2 session):** (1) the join key
between classifier CSV and combiner output — is there a stable per-row key
(`Internal Reference`?) and is row order preserved through postcode-split +
combine? (2) obtaining `property_id` for unmatched (no-UPRN) rows — v1's
`onConflictDoNothing` returns no ids, so v2 likely needs `RETURNING id` mapped
back to source rows.
- **Idempotency / re-run. RESOLVED 2026-06-04 (per-table).**
- `property`: keep today's `onConflictDoNothing` on `(portfolio_id, uprn) where uprn is not null` — existing properties are not churned.
- `property_overrides`: `onConflictDoUpdate` on `(property_id, override_component, building_part)`**recalculate** `override_value` + `original_spreadsheet_description` + `updated_at` to the latest resolution, so an existing property whose override changed is refreshed in place. Guard `WHERE source='classifier'` once a per-property user-edit path exists (until then every row is classifier-derived, so blind overwrite is correct). See the "Re-run = recalculate" and "Snapshot, not FK" locked decisions.
## Key code references (from exploration)
**Frontend (`assessment-model`):**
- [finalize/route.ts](../../src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/finalize/route.ts) — today's synchronous property insert (to be deleted).
- [property.ts](../../src/app/db/schema/property.ts) — `property` table (`property_type`, `built_form` columns exist; `uq_property_portfolio_uprn`).
- [landlord_overrides.ts](../../src/app/db/schema/landlord_overrides.ts) — the four per-component override tables + all pgEnums (`wallTypeEnum`, `roofTypeEnum`, `propertyTypeEnum`, `builtFormTypeEnum`, `overrideSourceEnum`).
- [bulk_address_uploads.ts](../../src/app/db/schema/bulk_address_uploads.ts) — `multiEntryOrdering` (permutations, `0=main`), `multiEntrySummary`, `verifyAck`, `combinedOutputS3Uri`.
- [ADR-0004](../adr/0004-multi-entry-building-part-ordering.md) — defers exactly this fact layer; the building-part ordering model.
- [ADR-0002](../adr/0002-landlord-override-vocabulary.md) — vocabulary layer.
**Backend (`Model`):**
- `applications/landlord_description_overrides/handler.py` — the worker pattern to mirror (`subtask_handler`, `TaskOrchestrator`, trigger body, `commit_scope`).
- `infrastructure/postgres/landlord_wall_type_override_table.py` — SQLModel mirror pattern for the new `PropertyOverrideRow`.
- `infrastructure/postgres/landlord_override_enums.py` — shared `override_source` SAEnum pattern.
- `infrastructure/postgres/property_table.py``PropertyRow` defensive view ("backend never inserts" — to change).
- `repositories/landlord_overrides/landlord_override_repository.py` — repository pattern for the new override repo.
- `orchestration/landlord_description_overrides_orchestrator.py` — orchestrator pattern; note it splits cells into an orderless set (discards part order — recovered via `multiEntryOrdering`).
- Downstream: `orchestration/property_baseline_orchestrator.py` (re-score-on-override seam), `orchestration/ingestion_orchestrator.py`.
## Docs to update (when this lands)
- **CONTEXT.md**: `Property Type` / `built_form` are **per-part-capable**, not
whole-dwelling. Add the per-Property fact layer (`property_overrides`) to the
glossary + relationships. Possibly a `building_part` index definition.
- **New ADR** (frontend) for `property_overrides` + finaliser; companion Model ADR
for the cross-repo write, citing ADR-0003/0004.

View file

@ -1,13 +1,14 @@
# Landlord override frontend — in-flight design notes
**Status:** Paused mid-grilling (2026-05-27)
**Branch:** `feautre/additional_walltypes`
**Status:** Grilling complete (2026-05-28) — Q1Q7 resolved; ready to promote to ADR
**Branch:** `feature/frontend_landlord_overrides`
**Author:** Jun-te (with Claude, via `/grill-me`)
This is a *design-in-progress* document, not an ADR. It captures decisions made
so far on the landlord-override frontend plan so the conversation can resume
without re-litigating settled questions. Promote to an ADR once the trigger
mechanism (Q4) is resolved — that's the decision worth permanent recording.
This is a _design-in-progress_ document, not an ADR. It captures the decisions
reached during grilling so the conversation can resume without re-litigating
settled questions. All seven questions are now resolved; the trigger +
state-machine integration (Q4) is ready to promote to a frontend ADR-0003, and
the work is ready to break into issues.
## Goal
@ -23,14 +24,22 @@ Build the front-end e2e for `landlord_description_override`, starting from the
rationale in [ADR-0002](../adr/0002-landlord-override-vocabulary.md).
- Nothing in Next.js reads or writes them yet.
- The Python lambda at
`/workspaces/home/github/Model/applications/landlord_description_overrides/handler.py`
is **not deployed** and **not wired** into the BulkUpload pipeline. It
hardcodes its trigger params (`portfolio_id`, `s3_uri`) and its source column
names (`"Property Type"`, `"Walls"`, `"Roofs"`).
- Note: ADR-0002 says writes come from Next.js POST, but the current backend
writes direct to Postgres. This drift may need to be revisited under Q4.
`Model/applications/landlord_description_overrides/handler.py` (branch
`feature/landlord_data`) is **not deployed** and **not wired** into the
BulkUpload pipeline. It hardcodes the trigger fields (handler.py:55-60) and
builds a hardcoded `ClassifiableColumn` list with `source_column`s
`"Property Type"` (→ both `property_type` **and** `built_form_type`),
`"Walls"`, `"Roofs"`. It also caps the batch to 20 rows while under test.
- **Resolved drift (was a Q4 risk):** the backend's
[ADR-0003 (python-writes-landlord-overrides-directly)](https://github.com/Hestia-Homes/Model/blob/main/docs/adr/0003-python-writes-landlord-overrides-directly.md),
accepted 2026-05-26, **supersedes** ADR-0002's "writes happen from Next.js"
clause. The lambda computes **and** persists the classification directly to
Postgres via a SQLAlchemy `LandlordOverrideRepository[E]`, with an
`ON CONFLICT … WHERE source = 'classifier'` upsert. There is **no** Next.js
POST-back. Drizzle stays schema source-of-truth; the Python `SQLModel`
shadows it.
## Decided so far
## Decided
### Q1 — Scope
@ -41,8 +50,7 @@ to the lambda → lambda needs edits to work when deployed.
### Q2 — Categories
**All four classifier categories** get independent optional slots in
[`INTERNAL_FIELDS`](../../src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/map-columns/MapColumnsClient.tsx#L14-L21):
**All four classifier categories** are independent optional fields:
`property_type`, `built_form_type`, `wall_type`, `roof_type`.
Rejected alternatives: (a) start with only PT+BF — wasted plumbing churn for
@ -50,58 +58,121 @@ the same migration cost; (c) collapse PT+BF into one UI slot — bakes a
backend coincidence (they read the same CSV column today) into the
user-facing model.
### Q2.1 — No `autoDetect` for the new slots
### Q2.1 — No `autoDetect` for classifier fields
The four new slots default to `"skip"`. The user must explicitly map them.
`autoDetect()` regex patterns are for required address-ish fields only.
The four classifier fields default to unmapped ("Not provided"). The user must
explicitly map them. `autoDetect()` is for required address-ish fields only.
**Why:** Address headers are unambiguous and required, so guessing is safe and
useful. Landlord-description columns are ambiguous (a "type" column could be
PropertyType or BuiltFormType or something else) and they are optional —
useful. Landlord-description columns are ambiguous and optional —
auto-detecting them would silently opt the landlord into classifier runs they
didn't intend.
## Open — resume here
### Q2.2 — Mapping shape: unified `field → header` (refines Q2's mechanism)
### Q3 (in flight) — Uniqueness validation on the mapping
The mapping is **one unified map keyed by internal field, valued by source CSV
header** (`{ address_1: "Addr 1", property_type: "Property Type",
built_form_type: "Property Type", wall_type: "Walls", … }`), replacing the
current `header → field` shape
([MapColumnsClient.tsx:62-64](<../../src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/map-columns/MapColumnsClient.tsx#L62-L64>)).
Today validation only checks required fields exist
([MapColumnsClient.tsx:67-68](../../src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/map-columns/MapColumnsClient.tsx#L67-L68));
two CSV headers can both map to `address_1` silently.
**Why:** the backend's `ClassifiableColumn` is a `(name, source_column)` pair
where **one `source_column` feeds many `name`s**`"Property Type"` feeds both
`property_type` and `built_form_type`
([classifiable_column.py:28-31](https://github.com/Hestia-Homes/Model/blob/feature/landlord_data/orchestration/classifiable_column.py)).
A `header → field` map (one value per header) **cannot express** that; the
chosen mechanism in Q2 (extending the single `INTERNAL_FIELDS` dropdown) would
leave `built_form_type` unmappable. `field → header` matches the backend model
1:1 and lets multiple categories share a header.
- (a) Leave it alone — backend last-wins.
- (b) Enforce uniqueness only on the four new slots.
- (c) Enforce uniqueness everywhere except `skip`. **Recommended.**
**Consequences:** UI inverts to one row per internal field with a header-picker;
`autoDetect` inverts to "best header per field"; "skip" becomes a per-field
"Not provided"; rewrite
[`transformFile`](<../../src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/start-address-matching/route.ts#L17-L54>)
(currently iterates `header → field`), the `z.record` route schema, and
**migrate existing persisted `columnMapping` rows** (semantics flip) — see Q5.
### Q4 (queued — biggest) — Trigger mechanism
### Q3 — Sharing / uniqueness rule (reframed)
How does Next.js invoke the lambda once mapping is complete? SQS message?
Direct lambda invoke? HTTP endpoint? And what's the state-machine integration —
new `BulkUpload` status, or runs orthogonally to address matching?
A source header may feed **at most one address/reference field**, but **any
number of classifier fields** (PT + BF → `"Property Type"` is required, so
classifier sharing must be allowed). Required-field validation stays:
`address_1` and `postcode` must each be assigned a header.
This drives both the deployment work and the lambda edits. Likely worth its
own ADR once decided.
This **replaces** the original "enforce uniqueness everywhere except skip"
framing, which was wrong once classifier header-sharing became a requirement.
### Q5 (queued) — Persistence of the extended mapping
### Q4 — Trigger mechanism + state-machine integration (the big one)
Current `bulkAddressUploads.columnMapping` is a `Record<string, string>` and
naturally accommodates the new slots. Confirm no separate table is needed.
- **Transport:** reuse
[`triggerFastApiPipeline`](../../src/lib/bulkUpload/server.ts) with a **new
FastAPI endpoint**; payload `{ task_id, sub_task_id, s3_uri, portfolio_id,
column_mapping }`. FastAPI turns the POST into the SQS subtask envelope the
handler TODO (handler.py:51-54) references. Not a direct lambda invoke, not
Next.js → SQS.
- **`s3_uri` is the ORIGINAL upload** (`upload.s3Bucket/s3Key`), **not** the
address-matching transformed CSV — the description columns and their original
header names only survive in the original (the address transform strips every
non-address column).
- **Writes:** lambda writes overrides directly to Postgres (ADR-0003); no
POST-back.
- **State machine:** the classifier runs as a **subtask under the same address
task** (not a separate task, not a new `BulkUpload` status). Both subtasks
fire together at the **"Start address matching"** action. The combined address
*output* is not affected (the classifier writes Postgres, not
`ara_raw_outputs/{task_id}/`), **but** the parent Task *status* IS recomputed
from all subtasks (`TaskOrchestrator._cascade`), so a classifier failure fails
the onboarding task and gates the combiner. This coupling was knowingly
accepted (2026-05-28), superseding Q1 non-blocking — see ADR-0003's Amended
note. (Corrects an earlier wrong claim that the envelope does no gating.)
- **Progress honesty:** add a nullable **`service`/`kind` discriminator to
`sub_task`** (existing rows = address/legacy) so the progress view shows
address batches vs classification separately and attributes failures
correctly. Update the 3 subtask-count sites + `OnboardingProgress`.
### Q6 (queued) — Lambda edits
### Q5 — Persistence + migration of the inverted mapping
Handler hardcodes `source_column="Property Type" / "Walls" / "Roofs"`; needs
to read the mapping from the trigger body.
`LandlordDescriptionOverridesTriggerBody` already exists — check what fields
it has vs needs.
Reuse `bulkAddressUploads.columnMapping` (jsonb `Record<string, string>`); **no
separate table**. The Q2.2 inversion (`header → field``field → header`) is
handled by a **one-shot data migration**: invert each non-skip entry; on a
legacy duplicate (two headers → one field, which the new Q3 rule forbids
anyway) last-write-wins; `skip` entries drop. The migration is a no-op on empty
tables, so it's safe regardless of data volume. `validateMapping`
([server.ts:97-102](../../src/lib/bulkUpload/server.ts#L97-L102)) and the route
schema must be rewritten for the inverted shape (they currently check the
**values** for `address_1`/`postcode`).
### Q7 (queued) — Review/Edit UI for classified mappings
### Q6 — Lambda edits
Is the per-row review/edit surface in scope for this iteration, or deferred?
User has not addressed yet. ADR-0002 calls this "the future override
frontend" and treats it as deferred work — but "front end e2e from
bulk_upload" could reasonably include it.
`LandlordDescriptionOverridesTriggerBody` (task_id, sub_task_id, s3_uri,
portfolio_id; `extra="allow"`) gains a **`column_mapping: dict[str, str]`**
field carrying **only the classifier subset** (`category → source header`); the
frontend extracts the four classifier keys from its unified map before sending.
The handler keeps a fixed registry of the four category builders (each owns its
enum, repo, and any hint such as the wall construction-date one) and supplies
`source_column` from `column_mapping`, **skipping categories the user didn't
map**. Both `property_type` and `built_form_type` carry `"Property Type"`, so
PT+BF sharing falls out for free. Drop the hardcoded trigger and the 20-row cap.
## Resuming
### Q7 — Results UI: read-only this iteration
Re-read this file, then ask Q3. Don't re-litigate Q1/Q2 unless the user
reopens them.
This iteration ships a **read-only** results surface: it reads the four
`landlord_*_overrides` tables and shows each `description → value` (per
portfolio) so the pipeline is observably e2e. **No editing / write-back** — the
user-override write path (correcting a row to `source='user'`, which the
classifier upsert won't overwrite) is the deferred "future override frontend"
ADR-0002 anticipates.
## Next steps
Grilling is complete (Q1Q7). Suggested follow-ups:
1. **Done** — Q4 promoted to
[ADR-0003](../adr/0003-classifier-triggers-as-address-subtask.md) (trigger +
state-machine integration), cross-linking the backend's ADR-0003.
2. Break the work into issues (`/to-issues`): (a) invert the mapping shape + UI
+ migration + validation; (b) `sub_task` discriminator + progress view;
(c) classifier trigger (new FastAPI endpoint + payload, fire at "Start");
(d) lambda edits (trigger body + registry-from-mapping); (e) read-only
results view.
3. Confirm the working branch (see header) before implementation starts.

View file

@ -0,0 +1,88 @@
# Landlord override e2e — verification & deploy checklist
**Created:** 2026-05-28
**Branch:** `feature/frontend_landlord_overrides` (this repo) + `feature/landlord_data` (Model repo)
**Plan:** [landlord-override-frontend-plan.md](./landlord-override-frontend-plan.md) · **ADR:** [0003-classifier-triggers-as-address-subtask.md](../adr/0003-classifier-triggers-as-address-subtask.md)
## Context for picking this up cold
The landlord-classifier e2e is **implemented across both repos but uncommitted**, and
**statically verified only** (frontend `tsc` 0 errors, `next lint` clean, backend
`py_compile` clean). It has **not** been run live — that needs the steps below
(migrations applied, SQS queue + env, FastAPI endpoint + lambda deployed with
OpenAI/S3/Postgres access). Two migration files are **generated but not applied**:
`0215_invert_column_mapping.sql` (data) and `0216_add_subtask_service.sql` (schema).
Work through the sections in order — each step's prerequisites come first.
---
## A. Before you start
- [ ] Use dev/preview first, not prod.
- [ ] Confirm `.env.local` DB creds (`DB_HOST/PORT/USERNAME/PASSWORD/NAME`) point at the target DB.
- [ ] **Back up** `column_mapping` — the 0215 inversion is one-shot/irreversible:
```sql
CREATE TABLE _bak_bulk_mapping AS
SELECT id, column_mapping FROM bulk_address_uploads WHERE column_mapping IS NOT NULL;
```
## B. Database migrations ⚠️ read the gotcha
`0215` = data (inverts `header→field``field→header`); `0216` = schema (`ADD COLUMN sub_task.service`).
⚠️ package.json only has `migration:push` (`drizzle-kit push`). **`push` diffs schema and will NOT run the 0215 data `UPDATE`** — it would add the column but silently skip the inversion. Use `migrate`:
- [ ] ```bash
npx drizzle-kit migrate # runs 0215 then 0216 in order
```
(If the team only uses `push`: run `push` for 0216, then execute `0215`'s SQL manually.)
- [ ] ⚠️ Run **0215 exactly once, on old-shape data**. Re-running re-inverts and corrupts. `migrate` guards via the journal; manual runs don't.
- [ ] Verify 0215 — values should now be headers:
```sql
SELECT column_mapping FROM bulk_address_uploads WHERE column_mapping IS NOT NULL LIMIT 5;
-- expect {"address_1":"Addr 1","postcode":"PCode", ...}
```
- [ ] Verify 0216:
```sql
SELECT 1 FROM information_schema.columns
WHERE table_name='sub_task' AND column_name='service';
```
## C. Backend deploy (Model service)
- [ ] Create an SQS queue for the classifier (e.g. `landlord-description-overrides`).
- [ ] Set **`LANDLORD_OVERRIDES_SQS_URL`** in the FastAPI env to that queue.
- [ ] Deploy FastAPI so `/v1/bulk-uploads/trigger-landlord-overrides` is live.
- [ ] Deploy the lambda (`applications/landlord_description_overrides`) + event-source mapping queue → lambda.
- [ ] Lambda env/IAM: `OPENAI_API_KEY`, Postgres creds, **S3 read on the original-upload bucket** (it reads `upload.s3Bucket/s3Key`, not `retrofit-data-dev`).
## D. Frontend deploy
- [ ] Deploy `assessment-model` with the new code (`FASTAPI_API_URL` / `FASTAPI_API_KEY` already set).
## E. Verify the UI (Column Remapper)
- [ ] Map-columns page shows **one row per field with a header dropdown**, split into **Address fields** + **Landlord description fields**.
- [ ] Leaving Address 1 / Postcode unset blocks submit.
- [ ] Two **address** fields → one column is blocked; the **same** column → Property Type + Built Form is allowed.
- [ ] Existing `mapping_complete` uploads open with their mapping intact (confirms 0215).
## F. Verify trigger → classify → persist
- [ ] Map ≥1 landlord-description field, click **Start address matching**.
- [ ] `sub_task` has two rows under the task: `service='address2uprn'` and `service='landlord_description_overrides'`.
- [ ] SQS message enqueued + lambda ran (CloudWatch).
- [ ] Rows appear with `source='classifier'`:
```sql
SELECT description, value FROM landlord_property_type_overrides WHERE portfolio_id = <id> LIMIT 10;
```
## G. Verify the results view
- [ ] `/portfolio/<id>/landlord-overrides` lists `description → value` per category with a "classifier" badge. (No nav link yet — reach by URL.)
## H. Regression — address matching unaffected
- [ ] The same upload's address pipeline still emits the canonical CSV (`Address 1`/`postcode`) and combines normally.
## I. Watch-out (by design — ADR-0003 "accepted coupling")
- [ ] If the classifier subtask **fails**, the shared onboarding task goes FAILED and **"Run Combiner" is blocked**; the task only COMPLETEs once classification finishes. If painful, switch to the separate-task design in the ADR.
---
## Still open / not done
- [ ] Commit the work (this repo + Model repo, separately) — currently uncommitted.
- [ ] Nav link to `/portfolio/<id>/landlord-overrides` (reachable by URL only).
- [ ] User-edit write-back for overrides (deferred — Q7 "read-only this iteration").

View file

@ -0,0 +1,178 @@
# Multi-entry building-part ordering — in-flight design notes
**Status:** Grilling complete (2026-06-02) — ready to break into issues
**Branch:** `feature/frontend_landlord_overrides`
**Author:** Jun-te (with Claude, via `/grill-me`)
A _design-in-progress_ document, not the ADR. It records the decisions reached
during grilling so the conversation can resume without re-litigating settled
questions. The flow + schema decision is promoted to
[ADR-0004](../adr/0004-multi-entry-building-part-ordering.md); new domain terms
are promoted to [CONTEXT.md](../../CONTEXT.md#building-parts).
## Goal
After address matching and classification finish, a single address row can carry
**comma-separated entries** in physical-element columns — e.g.
`Walls = "Cavity: AsBuilt (1976-1982), Cavity: FilledCavity"`,
`Roofs = "Flat: As Built, PitchedNormalLoftAccess: 200mm"`. Each entry is a
**building part** (main building + extensions). The order is ambiguous and a
**consistent per-file mistake**, so we capture the correct ordering from the user
**once per file** and persist it on the BulkUpload for a later consumer.
## Backstory / ground truth (verified against the example file + code)
- In `ARA AddressProfiling_Download_28-04-2026_10501 (2).xlsx` (32,213 data
rows): **0 UPRNs appear in more than one row** — multi-entry is
comma-separated values **inside one cell**, never multiple rows per address.
- In a multi-entry row the multi-valued columns **agree on count** (Walls=2 ∧
Roofs=2) while whole-dwelling columns stay at 1 (`Property Type` = `"House:
EndTerrace"`). So position *i* is the **same building part across every
multi-valued column**.
- The classifier today **discards** this: [`get_col_to_description_mappings`](/workspaces/home/github/Model/orchestration/landlord_description_overrides_orchestrator.py)
does `value.split(",")` into a **`set`** — orderless, deduped. Correct for the
vocabulary layer (description→enum), but it drops exactly the
position/building-part association this feature needs.
- This is the **per-Property building-part fact** territory ADR-0002 deferred
("a per-Property fact layer (not yet modelled)"). We are **not** building that
layer here — only **capturing** the ordering it will need.
## Decided
### Q1 — Order semantics: full reorder, keyed by count
Position *i* = a building part. The user supplies a **permutation per distinct
entry-count**; persisted as `{ count: permutation }`. This iteration captures
only the **largest-count** sample (see Q5).
### Q1.1 — Order scope: one ordering across all columns
A single per-count permutation realigns **every** multi-valued column at once
(index-aligned — Walls[i] and Roofs[i] are the same part). Not per-column.
Matches the data (counts agree across columns).
### Q1.2 — Mixed counts: single-value columns are whole-property
A 1-entry column (e.g. `Property Type`) is a **whole-dwelling** fact attached to
the property; only columns with N>1 are sliced into building parts. No padding.
### Q2 — Scope: capture + persist ordering only
Detect multi-entry, show one sample address + our classification, capture the
per-count ordering, persist on the BulkUpload. **Not** in scope: the
per-Property fact table or writing main/extension facts at finalise. The
ordering is stored for a later consumer.
### Q2.1 — Editable verification IS in scope (expands Q2)
The "verify classification" step lets the user **correct** a classification,
written back as `source='user'`. This deliberately picks up ADR-0002 Q7's
deferred **vocabulary** user-override write path — distinct from the per-Property
fact layer, which stays deferred.
### Q3 — Placement: on the `awaiting_review` surface
Render the flow on the existing
[OnboardingProgress](<../../src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx>)
page when `status === "awaiting_review"`. Classification finishes *before* the
combiner (both subtasks must complete → combiner → `awaiting_review`), so by the
time Finalise is offered the classification output exists. No new route.
### Q3.1 — Flow: two-step stepper, steps appear independently
- **Step 1 — Verify classification** — shows whenever **≥1 classifier column**
was mapped.
- **Step 2 — Confirm order** — shows only when **multi-entry was detected**.
- A file with classifier columns but no multi-entry shows only Step 1; a file
with neither goes straight to Finalise.
### Q3.2 — Gate: both steps gate Finalise (where each applies)
`canFinalize = status==="awaiting_review" && (noClassifierCols || verifyAck) &&
(noMultiEntry || orderingConfirmed)`. Two flags persisted. Finalise is one
click but the button stays disabled until its applicable gates are satisfied.
### Q4 — Verify step lists the sample address's entries only
Step 1 lists just the descriptions in the **one sample address** (matches "one
address"). Because a correction is per-`(portfolio, description)`, editing one
changes the mapping **portfolio-wide** for that text — the UI must say so. A
spot-check, not full-vocabulary coverage.
### Q4.1 — Write-back: Next.js upsert, `source='user'`, single row (as built)
A Next.js route handler / server action upserts the `landlord_*_overrides` row
by `(portfolio_id, description)` setting `value` + `source='user'`, validating
against the pgEnum. **Schema unchanged** — we keep ADR-0002's `UNIQUE
(portfolio_id, description)` and flip the single row's source in place. The
Python classifier's existing `ON CONFLICT … WHERE source='classifier'`
([landlord_overrides_postgres_repository.py:84-91](/workspaces/home/github/Model/infrastructure/landlord_overrides/landlord_overrides_postgres_repository.py#L84))
then never re-clobbers it.
> Considered and **rejected**: two rows per description (classifier + user) with
> read-time `user > classifier` resolution. It buys "revert to our suggestion" +
> provenance, and is cheap now (no readers exist yet), but reopens ADR-0002's
> `UNIQUE` decision and migrates Drizzle + 4 Python tables + the conflict target.
> Not worth it for this iteration; the single-row flip already gives "user wins".
> This is the first Next.js writer of a `source='user'` row.
### Q5 — Which sample: the largest-count row
Show one sample address — the row with the **most** building parts — so ordering
it reveals the fullest convention. In the common case (only N=2) that is a
single 2-part address.
### Q5.1 — Reorder UI: label each position
Lay the file's entries out as rows (position 0, 1, …), each with a building-part
dropdown (**Main building** / **Extension 1** / …). Assigning labels yields the
permutation and validates (each part used once, exactly one Main building). All
multi-valued columns are shown together, each raw entry annotated with our
classified enum, so the user sanity-checks classification **and** alignment.
### Q6 — Detection: at start, persist a summary
Compute the multi-entry summary in the **start-address-matching POST**
([route.ts:106](<../../src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/start-address-matching/route.ts#L106>))
where the full `rows` are already parsed in memory — which columns are
multi-valued, the distinct counts (with row-counts so we can pick the largest),
and the largest-count sample (address + per-column raw entries). Avoids
re-reading a 32k-row file at render. Classification enums are joined at render
from the override tables.
### Q7 — Persistence: two jsonb columns on `bulk_address_uploads`
- `multiEntrySummary jsonb` — written at start (detection).
- `multiEntryOrdering jsonb` — written at confirm: `{ count: permutation }` plus
`verifyAck` / `orderingConfirmed` flags (final shape TBD; may split flags into
their own columns).
No new table — mirrors how `columnMapping` lives on the upload row.
## Risks / load-bearing assumptions
1. **Consistent-mistake assumption.** All rows of a given count share one
ordering convention. The whole "ask once" design rests on this; if a file
mixes conventions within a count, a single per-count permutation is wrong.
2. **Largest-count-only capture.** Smaller counts stay unpopulated in the map.
A future consumer (or a later UI iteration) needs a derivation rule to apply
the convention to other counts.
3. **Normalization coupling — mitigated.** To join the sample's raw entries to
the override tables the frontend must match the backend's `split(",")`
`strip``lower`. **Resolution:** store the *normalized* description keys in
`multiEntrySummary` at start (the route already holds the rows), so the
render-time join is exact-match — no cross-repo string-normalization drift.
4. **Portfolio-wide blast radius.** A verify-step edit changes the mapping for
every row with that description, not just the sample address. Must be
messaged in the UI.
## Suggested issues (`/to-issues`)
1. Schema: two jsonb columns on `bulk_address_uploads` + migration.
2. Detection at start: compute + persist `multiEntrySummary` (with normalized
description keys).
3. Verify step: list sample descriptions → enum (join override tables),
editable; Next.js upsert route writing `source='user'`; `verifyAck` flag.
4. Order step: largest-count sample, position→part dropdowns → permutation;
persist `multiEntryOrdering`; `orderingConfirmed` flag.
5. Gate: wire `canFinalize` to the two flags; conditional stepper rendering.

1
migrate_to_db.sh Normal file
View file

@ -0,0 +1 @@
npx drizzle-kit migrate

View file

@ -12,8 +12,9 @@
"test:e2e:open": "start-server-and-test dev http://localhost:3000 \"cypress open --e2e\"",
"test:e2e:run": "cypress run",
"migration:generate": "drizzle-kit generate",
"migration:push": "drizzle-kit push",
"create_user": "tsx src/app/db/create_user.ts"
"migration:migrate": "drizzle-kit migrate",
"create_user": "tsx src/app/db/create_user.ts",
"backfill:recommendation-denormalization": "tsx src/app/db/backfill-recommendation-denormalization.ts"
},
"dependencies": {
"@aws-sdk/client-s3": "^3.971.0",

View file

@ -1 +0,0 @@
npx drizzle-kit push

View file

@ -0,0 +1,68 @@
import {
getSampleClassifications,
getUnknownOverrides,
setClassificationOverride,
} from "@/lib/bulkUpload/server";
import { NextRequest, NextResponse } from "next/server";
import { getServerSession } from "next-auth";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
import { z } from "zod";
// Read-only: the classifier's resolved enums for the review sample's entries
// (field -> description -> value), plus the descriptions still classified
// `Unknown` portfolio-wide — the Finalise gate blocks until that list is empty
// and the user can resolve each via PATCH below (ADR-0004 #298, ADR-0006).
export async function GET(
_request: NextRequest,
{ params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
) {
const session = await getServerSession(AuthOptions);
if (!session) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
const { portfolioId, uploadId } = await params;
const [classifications, unknown] = await Promise.all([
getSampleClassifications(uploadId),
getUnknownOverrides(portfolioId),
]);
return NextResponse.json({ classifications, unknown }, { status: 200 });
}
const PatchSchema = z.object({
field: z.string(),
description: z.string(),
value: z.string(),
});
// Correct one classification, written as a user override (source='user'). The
// edit is per-(portfolio, description), so it applies portfolio-wide (issue #299).
export async function PATCH(
request: NextRequest,
{ params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
) {
const session = await getServerSession(AuthOptions);
if (!session) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
const { portfolioId } = await params;
let body;
try {
body = PatchSchema.parse(await request.json());
} catch {
return NextResponse.json({ error: "Invalid input" }, { status: 400 });
}
try {
const result = await setClassificationOverride(
portfolioId,
body.field,
body.description,
body.value,
);
if (result.kind === "invalid")
return NextResponse.json({ error: result.reason }, { status: 422 });
return NextResponse.json({ ok: true }, { status: 200 });
} catch (error) {
console.error("Failed to save classification override:", error);
return NextResponse.json({ error: "Internal server error" }, { status: 500 });
}
}

View file

@ -25,6 +25,10 @@ export async function POST(
);
case "already_combined":
return NextResponse.json({ alreadyCombined: true }, { status: 200 });
case "already_dispatched":
// Lost the double-dispatch CAS (or the combiner is already running) — a
// benign no-op; the client just keeps polling and sees `combining`.
return NextResponse.json({ alreadyDispatched: true }, { status: 200 });
case "not_found":
return NextResponse.json({ error: "Not found" }, { status: 404 });
case "missing_task":

View file

@ -1,160 +1,46 @@
import { db } from "@/app/db/db";
import { property } from "@/app/db/schema/property";
import { sql } from "drizzle-orm";
import { NextRequest, NextResponse } from "next/server";
import { revalidatePath } from "next/cache";
import { getServerSession } from "next-auth";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
import { createRetrofitDataS3Client } from "@/app/utils/s3";
import * as XLSX from "xlsx";
import { loadForFinalize, markFinalized } from "@/lib/bulkUpload/server";
const ADDRESS_COLS = ["Address 1", "Address 2", "Address 3"] as const;
const POSTCODE_COL = "postcode";
const INTERNAL_REF_COL = "Internal Reference";
const UPRN_COL = "address2uprn_uprn";
const MATCHED_ADDRESS_COL = "address2uprn_address";
const LEXISCORE_COL = "address2uprn_lexiscore";
const MISSING_SENTINEL = "invalid postcode";
const UK_POSTCODE_RE = /[A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2}/i;
function normalize(v: unknown): string {
if (v === null || v === undefined) return "";
return String(v).trim();
}
function isMissing(v: string): boolean {
return v === "" || v.toLowerCase() === MISSING_SENTINEL;
}
function parseUprn(raw: unknown): bigint | null {
const v = normalize(raw);
if (isMissing(v)) return null;
try {
return BigInt(v);
} catch {
return null;
}
}
function parseLexiscore(raw: unknown): number | null {
const v = normalize(raw);
if (isMissing(v)) return null;
const n = Number(v);
return Number.isFinite(n) ? n : null;
}
function extractPostcode(matched: string | null, fallback: string): string | null {
if (matched) {
const m = matched.match(UK_POSTCODE_RE);
if (m) return m[0].toUpperCase();
}
return fallback || null;
}
function parseS3Uri(uri: string): { bucket: string; key: string } | null {
if (!uri.startsWith("s3://")) return null;
const rest = uri.slice(5);
const slash = rest.indexOf("/");
if (slash < 0) return null;
return { bucket: rest.slice(0, slash), key: rest.slice(slash + 1) };
}
import { readSessionToken } from "@/lib/session";
import { dispatchFinaliser } from "@/lib/bulkUpload/server";
// Finalise is now asynchronous (ADR-0005). This route no longer inserts
// properties; it dispatches the bulk_upload_finaliser Lambda and flips the
// BulkUpload to `finalising` via a compare-and-swap (the double-dispatch guard).
// The Lambda reads the combiner output, inserts the property rows, and writes the
// terminal `complete`/`failed` status directly. The user sees "Uploading to ARA"
// while the row is `finalising`; the onboarding surface polls for the outcome.
export async function POST(
_request: NextRequest,
request: NextRequest,
{ params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
) {
const session = await getServerSession(AuthOptions);
if (!session) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
const { uploadId } = await params;
const sessionToken = readSessionToken(request);
const guarded = await loadForFinalize(uploadId);
switch (guarded.kind) {
const result = await dispatchFinaliser({ uploadId, sessionToken });
switch (result.kind) {
case "ok":
// Accepted: the finaliser is running; the row is now `finalising`.
return NextResponse.json({ taskId: result.taskId }, { status: 202 });
case "not_found":
return NextResponse.json({ error: "Not found" }, { status: 404 });
case "already_finalized":
// Idempotent: nothing to do.
return new NextResponse(null, { status: 200 });
case "wrong_state":
return NextResponse.json(
{ error: `Upload not ready to finalize (state: ${guarded.current})` },
{ status: 409 }
);
case "not_yet_combined":
return NextResponse.json({ error: "Combiner not finished" }, { status: 409 });
}
const upload = guarded.upload;
const parsed = parseS3Uri(upload.combinedOutputS3Uri!);
if (!parsed) {
return NextResponse.json({ error: "Invalid combined output S3 URI" }, { status: 500 });
}
const s3 = createRetrofitDataS3Client();
let rawRows: Record<string, unknown>[];
try {
const obj = await s3
.getObject({ Bucket: parsed.bucket, Key: parsed.key })
.promise();
const buf = Buffer.from(obj.Body as Uint8Array);
const wb = XLSX.read(buf, { type: "buffer" });
const sheet = wb.Sheets[wb.SheetNames[0]];
rawRows = XLSX.utils.sheet_to_json<Record<string, unknown>>(sheet, { defval: "" });
} catch (err) {
console.error("Failed to read combined CSV from S3:", err);
return NextResponse.json({ error: "Failed to read combined CSV" }, { status: 502 });
}
const portfolioIdBig = BigInt(upload.portfolioId);
const values = rawRows.map((raw) => {
const userInputtedAddress =
ADDRESS_COLS.map((c) => normalize(raw[c])).filter(Boolean).join(", ") || null;
const userInputtedPostcode = normalize(raw[POSTCODE_COL]) || null;
const uprn = parseUprn(raw[UPRN_COL]);
const matchedAddressRaw = normalize(raw[MATCHED_ADDRESS_COL]);
const matchedAddress = isMissing(matchedAddressRaw) ? null : matchedAddressRaw;
const address = matchedAddress ?? userInputtedAddress;
const postcode = extractPostcode(matchedAddress, userInputtedPostcode ?? "");
const internalRef = normalize(raw[INTERNAL_REF_COL]) || null;
const lexiscore = parseLexiscore(raw[LEXISCORE_COL]);
return {
portfolioId: portfolioIdBig,
creationStatus: "READY" as const,
uprn,
landlordPropertyId: internalRef,
address,
postcode,
userInputtedAddress,
userInputtedPostcode,
lexiscore,
};
});
try {
if (values.length > 0) {
await db
.insert(property)
.values(values)
.onConflictDoNothing({
target: [property.portfolioId, property.uprn],
where: sql`${property.uprn} IS NOT NULL`,
});
}
await markFinalized(uploadId);
revalidatePath("/portfolio/[slug]", "layout");
return new NextResponse(null, { status: 200 });
} catch (err) {
console.error("Failed to finalize bulk upload:", err);
return NextResponse.json({ error: "Failed to import properties" }, { status: 500 });
case "missing_task":
return NextResponse.json({ error: "Upload has no task to finalise" }, { status: 409 });
case "wrong_state":
return NextResponse.json(
{ error: `Upload not ready to finalize (state: ${result.current})` },
{ status: 409 }
);
case "trigger_failed":
return NextResponse.json({ error: result.message }, { status: result.status });
}
}

View file

@ -0,0 +1,49 @@
import { setMultiEntryOrdering } from "@/lib/bulkUpload/server";
import { NextRequest, NextResponse } from "next/server";
import { getServerSession } from "next-auth";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
import { z } from "zod";
const PatchSchema = z.object({
// entry-count -> permutation (part slot -> file position). See ADR-0004.
permutations: z.record(z.string(), z.array(z.number().int().nonnegative())),
});
export async function PATCH(
request: NextRequest,
{ params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
) {
const session = await getServerSession(AuthOptions);
if (!session) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
const { uploadId } = await params;
let body;
try {
body = PatchSchema.parse(await request.json());
} catch {
return NextResponse.json({ error: "Invalid input" }, { status: 400 });
}
try {
const result = await setMultiEntryOrdering(uploadId, body.permutations);
switch (result.kind) {
case "ok":
return NextResponse.json(result.upload, { status: 200 });
case "not_found":
return NextResponse.json({ error: "Not found" }, { status: 404 });
case "wrong_state":
return NextResponse.json(
{ error: `Cannot set ordering in state '${result.current}'` },
{ status: 409 }
);
case "not_multi_entry":
return NextResponse.json({ error: "Upload has no multi-entry rows" }, { status: 409 });
case "invalid_ordering":
return NextResponse.json({ error: result.reason }, { status: 422 });
}
} catch (error) {
console.error("Failed to save multi-entry ordering:", error);
return NextResponse.json({ error: "Internal server error" }, { status: 500 });
}
}

View file

@ -1,39 +1,35 @@
import { NextRequest, NextResponse } from "next/server";
import { getServerSession } from "next-auth";
import { randomUUID } from "node:crypto";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
import { createS3Client, createRetrofitDataS3Client, retrofitDataS3Bucket } from "@/app/utils/s3";
import * as XLSX from "xlsx";
import { loadForAddressMatching, triggerAddressMatching } from "@/lib/bulkUpload/server";
import { loadForAddressMatching, saveMultiEntrySummary, triggerAddressMatching, triggerClassifier } from "@/lib/bulkUpload/server";
import { readSessionToken } from "@/lib/session";
import { ADDRESS_FIELDS, classifierMapping } from "@/lib/bulkUpload/columnFields";
import { addressCsvKey, classifierCsvKey, SOURCE_ROW_ID_COLUMN } from "@/lib/bulkUpload/s3Keys";
import { detectMultiEntry } from "@/lib/bulkUpload/multiEntry";
const FIELD_RENAME: Record<string, string> = {
address_1: "Address 1",
address_2: "Address 2",
address_3: "Address 3",
postcode: "postcode",
internal_reference: "Internal Reference",
};
type SheetRow = Record<string, unknown>;
function transformFile(
buffer: Buffer,
columnMapping: Record<string, string>
): { csv: string; error?: never } | { csv?: never; error: string } {
function readRows(buffer: Buffer): SheetRow[] {
const wb = XLSX.read(buffer, { type: "buffer" });
const sheet = wb.Sheets[wb.SheetNames[0]];
const rows = XLSX.utils.sheet_to_json<Record<string, unknown>>(sheet, { defval: "" });
return XLSX.utils.sheet_to_json<SheetRow>(sheet, { defval: "" });
}
if (rows.length === 0) return { error: "Empty file" };
const sourceHeaders = Object.keys(rows[0]);
// Address-matching CSV: address fields only, renamed to canonical headers.
function buildAddressCsv(
rows: SheetRow[],
columnMapping: Record<string, string> // field → source header
): { csv: string; error?: never } | { csv?: never; error: string } {
const outputHeaders: string[] = [];
const sourceToOutput: Record<string, string> = {};
for (const src of sourceHeaders) {
const mapped = columnMapping[src];
if (!mapped || mapped === "skip") continue;
const renamed = FIELD_RENAME[mapped] ?? mapped;
outputHeaders.push(renamed);
sourceToOutput[src] = renamed;
const outputToSource: Record<string, string> = {};
for (const field of ADDRESS_FIELDS) {
const src = columnMapping[field.value];
if (!src || !field.outputHeader) continue;
outputHeaders.push(field.outputHeader);
outputToSource[field.outputHeader] = src;
}
if (!outputHeaders.includes("Address 1"))
@ -41,11 +37,17 @@ function transformFile(
if (!outputHeaders.includes("postcode"))
return { error: 'Mapping must include "postcode"' };
// Carry the synthetic per-row join key through to the combiner output, so the
// finaliser can re-associate a UPRN-matched row with its classifier
// descriptions (ADR-0006). It rides `address2uprn` as a preserved input column.
outputHeaders.push(SOURCE_ROW_ID_COLUMN);
const outputRows = rows.map((row) => {
const out: Record<string, unknown> = {};
for (const [src, renamed] of Object.entries(sourceToOutput)) {
out[renamed] = row[src] ?? "";
const out: SheetRow = {};
for (const [outName, src] of Object.entries(outputToSource)) {
out[outName] = row[src] ?? "";
}
out[SOURCE_ROW_ID_COLUMN] = row[SOURCE_ROW_ID_COLUMN] ?? "";
return out;
});
@ -53,6 +55,32 @@ function transformFile(
return { csv: XLSX.utils.sheet_to_csv(outSheet) };
}
// Classifier CSV: the mapped classifier source columns only, original headers
// preserved (the lambda resolves them via column_mapping). Converting here means
// the classifier always reads a real CSV even when the upload was .xlsx/.xls —
// see ADR-0003. One source header may feed several categories, so dedupe to
// distinct headers.
function buildClassifierCsv(
rows: SheetRow[],
classifierMap: Record<string, string> // category → source header
): string {
const sourceHeaders = [...new Set(Object.values(classifierMap))];
// Emit the synthetic join key alongside the classifier columns so the
// finaliser can join this row's descriptions to its combiner identity by
// `source_row_id` (ADR-0006). `buildClassifierCsv` projects a fixed column
// set, so the key must be added explicitly — attaching it to the row is not
// enough.
const headers = [...sourceHeaders, SOURCE_ROW_ID_COLUMN];
const outputRows = rows.map((row) => {
const out: SheetRow = {};
for (const h of sourceHeaders) out[h] = row[h] ?? "";
out[SOURCE_ROW_ID_COLUMN] = row[SOURCE_ROW_ID_COLUMN] ?? "";
return out;
});
const outSheet = XLSX.utils.json_to_sheet(outputRows, { header: headers });
return XLSX.utils.sheet_to_csv(outSheet);
}
export async function POST(
request: NextRequest,
{ params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
@ -91,11 +119,29 @@ export async function POST(
return NextResponse.json({ error: "Failed to read source file" }, { status: 500 });
}
const transformed = transformFile(fileBuffer, upload.columnMapping!);
const parsedRows = readRows(fileBuffer);
if (parsedRows.length === 0)
return NextResponse.json({ error: "Empty file" }, { status: 422 });
// Mint a stable synthetic id per source row, here at the one point both CSVs
// are built from the same array, and write it into both. It is the finaliser's
// join key between the combiner output (identity) and the classifier CSV
// (descriptions) — see ADR-0006. Deterministic ordering is not required: both
// CSVs are produced together in this handler, so they always share values.
const rows = parsedRows.map((row) => ({
...row,
[SOURCE_ROW_ID_COLUMN]: randomUUID(),
}));
// Detect multi-entry building parts now, while the whole file is parsed in
// memory, so the awaiting_review surface never re-reads it (ADR-0004).
await saveMultiEntrySummary(uploadId, detectMultiEntry(rows, upload.columnMapping!));
const transformed = buildAddressCsv(rows, upload.columnMapping!);
if (transformed.error)
return NextResponse.json({ error: transformed.error }, { status: 422 });
const transformedKey = `bulk_onboarding_inputs/${portfolioId}/${uploadId}.csv`;
const transformedKey = addressCsvKey(portfolioId, uploadId);
try {
await outputS3
.putObject({
@ -112,13 +158,37 @@ export async function POST(
const s3Uri = `s3://${outputBucket}/${transformedKey}`;
const trigger = await triggerAddressMatching({
uploadId,
s3Uri,
sessionToken: readSessionToken(request),
});
// Convert the mapped classifier columns to their own CSV so the classifier
// lambda always parses a real CSV, never the raw upload (which may be
// .xlsx/.xls). Only when the user mapped ≥1 classifier column. See ADR-0003.
const classifierMap = classifierMapping(upload.columnMapping!);
let classifierS3Uri: string | undefined;
if (Object.keys(classifierMap).length > 0) {
const classifierKey = classifierCsvKey(portfolioId, uploadId);
try {
await outputS3
.putObject({
Bucket: outputBucket,
Key: classifierKey,
Body: buildClassifierCsv(rows, classifierMap),
ContentType: "text/csv",
})
.promise();
classifierS3Uri = `s3://${outputBucket}/${classifierKey}`;
} catch (err) {
// Non-blocking: classification is skipped, address matching proceeds.
console.error("Failed to upload classifier CSV:", err);
}
}
const sessionToken = readSessionToken(request);
const trigger = await triggerAddressMatching({ uploadId, s3Uri, sessionToken });
if (trigger.kind === "trigger_failed")
return NextResponse.json({ error: trigger.message }, { status: trigger.status });
// Co-fire the landlord classifier (non-blocking) under the same task.
if (classifierS3Uri)
await triggerClassifier({ taskId: trigger.taskId, uploadId, s3Uri: classifierS3Uri, sessionToken });
return NextResponse.json({ taskId: trigger.taskId }, { status: 200 });
}

View file

@ -0,0 +1,35 @@
import { setVerifyAck } from "@/lib/bulkUpload/server";
import { NextRequest, NextResponse } from "next/server";
import { getServerSession } from "next-auth";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
// Records the "Verify classification" acknowledgement (ADR-0004 Step 1). No
// body — the per-row corrections go through the classifications PATCH; this just
// marks that the user has checked the sample and unlocks Finalise.
export async function PATCH(
request: NextRequest,
{ params }: { params: Promise<{ portfolioId: string; uploadId: string }> }
) {
const session = await getServerSession(AuthOptions);
if (!session) return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
const { uploadId } = await params;
try {
const result = await setVerifyAck(uploadId);
switch (result.kind) {
case "ok":
return NextResponse.json(result.upload, { status: 200 });
case "not_found":
return NextResponse.json({ error: "Not found" }, { status: 404 });
case "wrong_state":
return NextResponse.json(
{ error: `Cannot verify classification in state '${result.current}'` },
{ status: 409 }
);
}
} catch (error) {
console.error("Failed to record classification verification:", error);
return NextResponse.json({ error: "Internal server error" }, { status: 500 });
}
}

View file

@ -0,0 +1,226 @@
/**
* Backfills the denormalised plan_id / material_* columns on `recommendation`
* from the plan_recommendations and recommendation_materials join tables, then
* builds the supporting indexes and validates the foreign keys all ONLINE.
*
* Why this lives outside drizzle:
* drizzle-kit migrate wraps every pending migration in ONE transaction, so it
* cannot COMMIT between batches, cannot run CREATE INDEX CONCURRENTLY, and
* holds the ADD COLUMN's AccessExclusiveLock for the whole run. This script
* instead commits each batch, keeping locks tiny, WAL/bloat bounded, EBS IO
* burst balance healthy, and progress visible + resumable.
* See docs/adr/0001-data-backfills-outside-drizzle.md
*
* Run AFTER `npm run migration:migrate` has applied 0222/0224 (the column adds):
* npm run backfill:recommendation-denormalization
*
* Safe to re-run: every step is idempotent (IS NULL guards, IF NOT EXISTS,
* VALIDATE on an already-valid constraint is a no-op). If interrupted, just run
* it again it resumes from wherever it left off.
*
* Tunables via env:
* BACKFILL_BATCH_SIZE rows scanned per committed batch (default 25000)
* BACKFILL_SLEEP_MS pause between batches, eases IO pressure (default 50)
*/
import dotenv from "dotenv";
import { Pool, type PoolClient } from "pg";
dotenv.config({ path: ".env.local" });
const BATCH_SIZE = Number(process.env.BACKFILL_BATCH_SIZE ?? 25_000);
const SLEEP_MS = Number(process.env.BACKFILL_SLEEP_MS ?? 50);
const pool = new Pool({
host: process.env.DB_HOST,
port: Number(process.env.DB_PORT),
user: process.env.DB_USERNAME,
password: process.env.DB_PASSWORD,
database: process.env.DB_NAME,
});
const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));
/** Abort before touching data if the 1:1 assumption is violated. */
async function assertSingleCardinality(
client: PoolClient,
joinTable: string,
): Promise<void> {
const { rows } = await client.query(
`SELECT count(*)::int AS violations
FROM (
SELECT recommendation_id
FROM ${joinTable}
GROUP BY recommendation_id
HAVING count(*) > 1
) dupes`,
);
const violations = rows[0].violations as number;
if (violations > 0) {
throw new Error(
`Aborting: ${violations} recommendation(s) map to multiple ${joinTable} ` +
`rows. The backfill cannot pick deterministically. Resolve cardinality first.`,
);
}
}
/**
* Keyset-paginate the whole `recommendation` table by id and, for each batch,
* set the target columns from the join table where a match exists. Each batch
* is its own autocommitted statement (no surrounding BEGIN), so locks are held
* only for the rows in that batch.
*
* We scan by id (not `WHERE col IS NULL LIMIT n`) so rows with no match don't
* get re-selected forever they're simply left NULL and we move past them.
*/
async function backfillColumns(
client: PoolClient,
label: string,
updateSql: (idFrom: string, limit: string) => string,
): Promise<void> {
let lastId = "0";
let scannedTotal = 0;
let updatedTotal = 0;
const startedAt = Date.now();
for (;;) {
const { rows } = await client.query(updateSql("$1", "$2"), [
lastId,
BATCH_SIZE,
]);
const { max_id, scanned, updated } = rows[0] as {
max_id: string | null;
scanned: number;
updated: number;
};
if (!scanned || max_id === null) break;
scannedTotal += Number(scanned);
updatedTotal += Number(updated);
lastId = max_id;
const rate = Math.round(scannedTotal / ((Date.now() - startedAt) / 1000));
console.log(
`[${label}] up to id ${lastId} — scanned ${scannedTotal.toLocaleString()}, ` +
`updated ${updatedTotal.toLocaleString()} (${rate.toLocaleString()} rows/s scan)`,
);
if (SLEEP_MS > 0) await sleep(SLEEP_MS);
}
console.log(
`[${label}] done: scanned ${scannedTotal.toLocaleString()}, ` +
`updated ${updatedTotal.toLocaleString()} in ` +
`${Math.round((Date.now() - startedAt) / 1000)}s`,
);
}
async function main(): Promise<void> {
const client = await pool.connect();
try {
console.log(
`Config: batch=${BATCH_SIZE.toLocaleString()} rows, sleep=${SLEEP_MS}ms\n`,
);
// 1. Guard the 1:1 assumption before mutating anything.
console.log("Checking cardinality...");
await assertSingleCardinality(client, "plan_recommendations");
await assertSingleCardinality(client, "recommendation_materials");
// 2. Backfill plan_id.
await backfillColumns(
client,
"plan_id",
(idFrom, limit) => `
WITH batch AS (
SELECT id FROM recommendation
WHERE id > ${idFrom}
ORDER BY id
LIMIT ${limit}
),
upd AS (
UPDATE recommendation r
SET plan_id = pr.plan_id
FROM batch b
JOIN plan_recommendations pr ON pr.recommendation_id = b.id
WHERE r.id = b.id
AND r.plan_id IS NULL
RETURNING r.id
)
SELECT (SELECT max(id) FROM batch) AS max_id,
(SELECT count(*) FROM batch) AS scanned,
(SELECT count(*) FROM upd) AS updated`,
);
// 3. Backfill the four material_* columns in one pass.
await backfillColumns(
client,
"material",
(idFrom, limit) => `
WITH batch AS (
SELECT id FROM recommendation
WHERE id > ${idFrom}
ORDER BY id
LIMIT ${limit}
),
upd AS (
UPDATE recommendation r
SET material_id = rm.material_id,
material_quantity = rm.quantity,
material_quantity_unit = rm.quantity_unit,
material_depth = rm.depth
FROM batch b
JOIN recommendation_materials rm ON rm.recommendation_id = b.id
WHERE r.id = b.id
AND r.material_id IS NULL
RETURNING r.id
)
SELECT (SELECT max(id) FROM batch) AS max_id,
(SELECT count(*) FROM batch) AS scanned,
(SELECT count(*) FROM upd) AS updated`,
);
// 4. Build indexes CONCURRENTLY (no write lock). Must NOT be in a txn —
// a pooled client with no BEGIN runs each statement autocommitted.
console.log("Creating indexes concurrently...");
await client.query(
`CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_recommendation_plan_id
ON recommendation USING btree (plan_id)`,
);
await client.query(
`CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_recommendation_material_id
ON recommendation USING btree (material_id)`,
);
// 5. Validate the FKs online (ShareUpdateExclusiveLock — allows reads/writes).
console.log("Validating foreign keys...");
await client.query(
`ALTER TABLE recommendation
VALIDATE CONSTRAINT recommendation_plan_id_plan_id_fk`,
);
await client.query(
`ALTER TABLE recommendation
VALIDATE CONSTRAINT recommendation_material_id_material_id_fk`,
);
// 6. Report any rows still unlinked (expected for material; investigate for plan).
const { rows } = await client.query(
`SELECT count(*) FILTER (WHERE plan_id IS NULL) AS plan_null,
count(*) FILTER (WHERE material_id IS NULL) AS material_null
FROM recommendation`,
);
console.log(
`\nRemaining NULLs — plan_id: ${Number(rows[0].plan_null).toLocaleString()}, ` +
`material_id: ${Number(rows[0].material_null).toLocaleString()}`,
);
console.log("Backfill complete.");
} finally {
client.release();
await pool.end();
}
}
main().catch((err) => {
console.error("Backfill failed:", err);
process.exit(1);
});

View file

@ -0,0 +1 @@
ALTER TABLE "bulk_address_uploads" ADD COLUMN "multi_entry_summary" jsonb;

View file

@ -0,0 +1 @@
ALTER TABLE "bulk_address_uploads" ADD COLUMN "multi_entry_ordering" jsonb;

View file

@ -0,0 +1 @@
ALTER TABLE "bulk_address_uploads" ADD COLUMN "verify_ack" boolean DEFAULT false NOT NULL;

View file

@ -0,0 +1,281 @@
CREATE TYPE "public"."rebaseline_reason" AS ENUM('none', 'pre_sap10', 'physical_state_changed', 'both');--> statement-breakpoint
ALTER TYPE "public"."file_type" ADD VALUE IF NOT EXISTS 'other';--> statement-breakpoint
CREATE TABLE "epc_renewable_heat_incentive" (
"id" bigserial PRIMARY KEY NOT NULL,
"epc_property_id" bigint NOT NULL,
"space_heating_kwh" real NOT NULL,
"water_heating_kwh" real NOT NULL,
"impact_of_loft_insulation_kwh" real,
"impact_of_cavity_insulation_kwh" real,
"impact_of_solid_wall_insulation_kwh" real,
CONSTRAINT "epc_renewable_heat_incentive_epc_property_id_unique" UNIQUE("epc_property_id")
);
--> statement-breakpoint
CREATE TABLE "property_baseline_performance" (
"id" bigserial PRIMARY KEY NOT NULL,
"property_id" bigint NOT NULL,
"lodged_sap_score" integer NOT NULL,
"lodged_epc_band" "epc" NOT NULL,
"lodged_co2_emissions_t_per_yr" real NOT NULL,
"lodged_primary_energy_intensity_kwh_per_m2_yr" integer NOT NULL,
"effective_sap_score" integer NOT NULL,
"effective_epc_band" "epc" NOT NULL,
"effective_co2_emissions_t_per_yr" real NOT NULL,
"effective_primary_energy_intensity_kwh_per_m2_yr" integer NOT NULL,
"rebaseline_reason" "rebaseline_reason" NOT NULL,
"space_heating_kwh" real NOT NULL,
"water_heating_kwh" real NOT NULL,
"fuel_rates_period" text,
"heating_kwh" real,
"heating_cost_gbp" real,
"hot_water_kwh" real,
"hot_water_cost_gbp" real,
"lighting_kwh" real,
"lighting_cost_gbp" real,
"appliances_kwh" real,
"appliances_cost_gbp" real,
"cooking_kwh" real,
"cooking_cost_gbp" real,
"pumps_fans_kwh" real,
"pumps_fans_cost_gbp" real,
"cooling_kwh" real,
"cooling_cost_gbp" real,
"standing_charges_gbp" real,
"seg_credit_gbp" real,
"total_annual_bill_gbp" real,
CONSTRAINT "property_baseline_performance_property_id_unique" UNIQUE("property_id")
);
--> statement-breakpoint
ALTER TABLE "epc_building_part" ALTER COLUMN "wall_construction"
SET DATA TYPE jsonb
USING (
CASE
WHEN "wall_construction" IS NULL THEN NULL
WHEN "wall_construction" ~ '^-?[0-9]+$' THEN "wall_construction"::jsonb
ELSE to_json("wall_construction")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_building_part" ALTER COLUMN "wall_insulation_type"
SET DATA TYPE jsonb
USING (
CASE
WHEN "wall_insulation_type" IS NULL THEN NULL
WHEN "wall_insulation_type" ~ '^-?[0-9]+$' THEN "wall_insulation_type"::jsonb
ELSE to_json("wall_insulation_type")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_building_part" ALTER COLUMN "party_wall_construction"
SET DATA TYPE jsonb
USING (
CASE
WHEN "party_wall_construction" IS NULL THEN NULL
WHEN "party_wall_construction" ~ '^-?[0-9]+$' THEN "party_wall_construction"::jsonb
ELSE to_json("party_wall_construction")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_building_part" ALTER COLUMN "party_wall_construction" DROP NOT NULL;--> statement-breakpoint
ALTER TABLE "epc_building_part" ALTER COLUMN "flat_roof_insulation_thickness"
SET DATA TYPE jsonb
USING (
CASE
WHEN "flat_roof_insulation_thickness" IS NULL THEN NULL
WHEN "flat_roof_insulation_thickness" ~ '^-?[0-9]+$' THEN "flat_roof_insulation_thickness"::jsonb
ELSE to_json("flat_roof_insulation_thickness")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_building_part" ALTER COLUMN "roof_insulation_location"
SET DATA TYPE jsonb
USING (
CASE
WHEN "roof_insulation_location" IS NULL THEN NULL
WHEN "roof_insulation_location" ~ '^-?[0-9]+$' THEN "roof_insulation_location"::jsonb
ELSE to_json("roof_insulation_location")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_building_part" ALTER COLUMN "roof_insulation_thickness"
SET DATA TYPE jsonb
USING (
CASE
WHEN "roof_insulation_thickness" IS NULL THEN NULL
WHEN "roof_insulation_thickness" ~ '^-?[0-9]+$' THEN "roof_insulation_thickness"::jsonb
ELSE to_json("roof_insulation_thickness")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_main_heating_detail" ALTER COLUMN "main_fuel_type"
SET DATA TYPE jsonb
USING (
CASE
WHEN "main_fuel_type" IS NULL THEN NULL
WHEN "main_fuel_type" ~ '^-?[0-9]+$' THEN "main_fuel_type"::jsonb
ELSE to_json("main_fuel_type")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_main_heating_detail" ALTER COLUMN "heat_emitter_type"
SET DATA TYPE jsonb
USING (
CASE
WHEN "heat_emitter_type" IS NULL THEN NULL
WHEN "heat_emitter_type" ~ '^-?[0-9]+$' THEN "heat_emitter_type"::jsonb
ELSE to_json("heat_emitter_type")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_main_heating_detail" ALTER COLUMN "emitter_temperature"
SET DATA TYPE jsonb
USING (
CASE
WHEN "emitter_temperature" IS NULL THEN NULL
WHEN "emitter_temperature" ~ '^-?[0-9]+$' THEN "emitter_temperature"::jsonb
ELSE to_json("emitter_temperature")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_main_heating_detail" ALTER COLUMN "main_heating_control"
SET DATA TYPE jsonb
USING (
CASE
WHEN "main_heating_control" IS NULL THEN NULL
WHEN "main_heating_control" ~ '^-?[0-9]+$' THEN "main_heating_control"::jsonb
ELSE to_json("main_heating_control")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_property" ALTER COLUMN "energy_pv_connection"
SET DATA TYPE jsonb
USING (
CASE
WHEN "energy_pv_connection" IS NULL THEN NULL
WHEN "energy_pv_connection" ~ '^-?[0-9]+$' THEN "energy_pv_connection"::jsonb
ELSE to_json("energy_pv_connection")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_property" ALTER COLUMN "heating_cylinder_size"
SET DATA TYPE jsonb
USING (
CASE
WHEN "heating_cylinder_size" IS NULL THEN NULL
WHEN "heating_cylinder_size" ~ '^-?[0-9]+$' THEN "heating_cylinder_size"::jsonb
ELSE to_json("heating_cylinder_size")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_property" ALTER COLUMN "heating_immersion_heating_type"
SET DATA TYPE jsonb
USING (
CASE
WHEN "heating_immersion_heating_type" IS NULL THEN NULL
WHEN "heating_immersion_heating_type" ~ '^-?[0-9]+$' THEN "heating_immersion_heating_type"::jsonb
ELSE to_json("heating_immersion_heating_type")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_property" ALTER COLUMN "heating_cylinder_insulation_type"
SET DATA TYPE jsonb
USING (
CASE
WHEN "heating_cylinder_insulation_type" IS NULL THEN NULL
WHEN "heating_cylinder_insulation_type" ~ '^-?[0-9]+$' THEN "heating_cylinder_insulation_type"::jsonb
ELSE to_json("heating_cylinder_insulation_type")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_property" ALTER COLUMN "heating_secondary_heating_type"
SET DATA TYPE jsonb
USING (
CASE
WHEN "heating_secondary_heating_type" IS NULL THEN NULL
WHEN "heating_secondary_heating_type" ~ '^-?[0-9]+$' THEN "heating_secondary_heating_type"::jsonb
ELSE to_json("heating_secondary_heating_type")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_property" ALTER COLUMN "heating_shower_outlet_type"
SET DATA TYPE jsonb
USING (
CASE
WHEN "heating_shower_outlet_type" IS NULL THEN NULL
WHEN "heating_shower_outlet_type" ~ '^-?[0-9]+$' THEN "heating_shower_outlet_type"::jsonb
ELSE to_json("heating_shower_outlet_type")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_window" ALTER COLUMN "glazing_gap"
SET DATA TYPE jsonb
USING (
CASE
WHEN "glazing_gap" IS NULL THEN NULL
WHEN "glazing_gap" ~ '^-?[0-9]+$' THEN "glazing_gap"::jsonb
ELSE to_json("glazing_gap")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_window" ALTER COLUMN "orientation"
SET DATA TYPE jsonb
USING (
CASE
WHEN "orientation" IS NULL THEN NULL
WHEN "orientation" ~ '^-?[0-9]+$' THEN "orientation"::jsonb
ELSE to_json("orientation")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_window" ALTER COLUMN "window_type"
SET DATA TYPE jsonb
USING (
CASE
WHEN "window_type" IS NULL THEN NULL
WHEN "window_type" ~ '^-?[0-9]+$' THEN "window_type"::jsonb
ELSE to_json("window_type")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_window" ALTER COLUMN "glazing_type"
SET DATA TYPE jsonb
USING (
CASE
WHEN "glazing_type" IS NULL THEN NULL
WHEN "glazing_type" ~ '^-?[0-9]+$' THEN "glazing_type"::jsonb
ELSE to_json("glazing_type")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_window" ALTER COLUMN "draught_proofed"
SET DATA TYPE jsonb
USING to_json("draught_proofed")::jsonb;--> statement-breakpoint
ALTER TABLE "epc_window" ALTER COLUMN "window_location"
SET DATA TYPE jsonb
USING (
CASE
WHEN "window_location" IS NULL THEN NULL
WHEN "window_location" ~ '^-?[0-9]+$' THEN "window_location"::jsonb
ELSE to_json("window_location")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_window" ALTER COLUMN "window_wall_type"
SET DATA TYPE jsonb
USING (
CASE
WHEN "window_wall_type" IS NULL THEN NULL
WHEN "window_wall_type" ~ '^-?[0-9]+$' THEN "window_wall_type"::jsonb
ELSE to_json("window_wall_type")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_window" ALTER COLUMN "permanent_shutters_present"
SET DATA TYPE jsonb
USING to_json("permanent_shutters_present")::jsonb;--> statement-breakpoint
ALTER TABLE "epc_window" ALTER COLUMN "transmission_data_source"
SET DATA TYPE jsonb
USING (
CASE
WHEN "transmission_data_source" IS NULL THEN NULL
WHEN "transmission_data_source" ~ '^-?[0-9]+$' THEN "transmission_data_source"::jsonb
ELSE to_json("transmission_data_source")::jsonb
END
);--> statement-breakpoint
ALTER TABLE "epc_building_part" ADD COLUMN "roof_construction_type" text;--> statement-breakpoint
ALTER TABLE "epc_building_part" ADD COLUMN "curtain_wall_age" text;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "mechanical_vent_duct_insulation_level" integer;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "addendum_stone_walls" boolean;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "addendum_system_build" boolean;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "addendum_numbers" jsonb;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "heating_number_baths" integer;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "heating_number_baths_wwhrs" integer;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "heating_electric_shower_count" integer;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "heating_mixer_shower_count" integer;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "ventilation_present" boolean DEFAULT false NOT NULL;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "ventilation_sheltered_sides" integer;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "ventilation_has_suspended_timber_floor" boolean;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "ventilation_suspended_timber_floor_sealed" boolean;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "ventilation_has_draught_lobby" boolean;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "ventilation_air_permeability_ap4_m3_h_m2" real;--> statement-breakpoint
ALTER TABLE "epc_property" ADD COLUMN "ventilation_mechanical_ventilation_kind" text;--> statement-breakpoint
ALTER TABLE "epc_renewable_heat_incentive" ADD CONSTRAINT "epc_renewable_heat_incentive_epc_property_id_epc_property_id_fk" FOREIGN KEY ("epc_property_id") REFERENCES "public"."epc_property"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "property_baseline_performance" ADD CONSTRAINT "property_baseline_performance_property_id_property_id_fk" FOREIGN KEY ("property_id") REFERENCES "public"."property"("id") ON DELETE no action ON UPDATE no action;

View file

@ -0,0 +1,16 @@
CREATE TYPE "public"."override_component" AS ENUM('wall_type', 'roof_type', 'property_type', 'built_form_type');--> statement-breakpoint
CREATE TABLE "property_overrides" (
"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
"property_id" bigint NOT NULL,
"portfolio_id" bigint NOT NULL,
"building_part" smallint NOT NULL,
"override_component" "override_component" NOT NULL,
"override_value" text NOT NULL,
"original_spreadsheet_description" text NOT NULL,
"created_at" timestamp with time zone DEFAULT now() NOT NULL,
"updated_at" timestamp with time zone DEFAULT now() NOT NULL,
CONSTRAINT "property_overrides_property_component_part_unique" UNIQUE("property_id","override_component","building_part")
);
--> statement-breakpoint
ALTER TABLE "property_overrides" ADD CONSTRAINT "property_overrides_property_id_property_id_fk" FOREIGN KEY ("property_id") REFERENCES "public"."property"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "property_overrides" ADD CONSTRAINT "property_overrides_portfolio_id_portfolio_id_fk" FOREIGN KEY ("portfolio_id") REFERENCES "public"."portfolio"("id") ON DELETE cascade ON UPDATE no action;

View file

@ -0,0 +1,7 @@
-- Metadata-only DDL: instant, holds AccessExclusiveLock on "recommendation"
-- only momentarily. The FK is added NOT VALID so no full-table validation scan
-- happens here; it is validated online by the standalone backfill script. The
-- index is created CONCURRENTLY by that script too, after the column is filled.
-- See docs/adr/0001-data-backfills-outside-drizzle.md
ALTER TABLE "recommendation" ADD COLUMN "plan_id" bigint;--> statement-breakpoint
ALTER TABLE "recommendation" ADD CONSTRAINT "recommendation_plan_id_plan_id_fk" FOREIGN KEY ("plan_id") REFERENCES "public"."plan"("id") ON DELETE cascade ON UPDATE no action NOT VALID;

View file

@ -0,0 +1,13 @@
-- Intentionally a no-op.
--
-- The plan_id backfill used to live here as a single full-table UPDATE inside
-- drizzle's migration transaction. Because drizzle wraps ALL pending migrations
-- in one transaction, that UPDATE held an AccessExclusiveLock on "recommendation"
-- (from the ADD COLUMN in 0222) for the entire multi-hour run, blocked unrelated
-- migrations behind it, exhausted EBS IO burst balance, and could not report
-- progress or be resumed.
--
-- The backfill now runs OUTSIDE drizzle, in committed batches, via:
-- npm run backfill:recommendation-denormalization
-- See docs/adr/0001-data-backfills-outside-drizzle.md
SELECT 1;

View file

@ -0,0 +1,10 @@
-- Metadata-only DDL: adding nullable columns with no default is an instant
-- catalog change. The FK is added NOT VALID (no scan); it is validated online
-- and the index built CONCURRENTLY by the standalone backfill script, after the
-- columns are populated.
-- See docs/adr/0001-data-backfills-outside-drizzle.md
ALTER TABLE "recommendation" ADD COLUMN "material_id" bigint;--> statement-breakpoint
ALTER TABLE "recommendation" ADD COLUMN "material_quantity" real;--> statement-breakpoint
ALTER TABLE "recommendation" ADD COLUMN "material_quantity_unit" "unit_quantity";--> statement-breakpoint
ALTER TABLE "recommendation" ADD COLUMN "material_depth" real;--> statement-breakpoint
ALTER TABLE "recommendation" ADD CONSTRAINT "recommendation_material_id_material_id_fk" FOREIGN KEY ("material_id") REFERENCES "public"."material"("id") ON DELETE set null ON UPDATE no action NOT VALID;

View file

@ -0,0 +1,12 @@
-- Intentionally a no-op.
--
-- The materials backfill (material_id + quantity/unit/depth) used to live here as
-- a single full-table UPDATE inside drizzle's migration transaction, with the
-- material_id index created BEFORE the backfill in 0224 — so every updated row
-- also had to maintain that index. It moved out for the same reasons as the
-- plan_id backfill (see 0223).
--
-- The backfill now runs OUTSIDE drizzle, in committed batches, via:
-- npm run backfill:recommendation-denormalization
-- See docs/adr/0001-data-backfills-outside-drizzle.md
SELECT 1;

View file

@ -0,0 +1,21 @@
CREATE TABLE "magic_plan_door_ventilation" (
"id" bigserial PRIMARY KEY NOT NULL,
"magic_plan_door_id" bigint NOT NULL,
"undercut_mm" real,
CONSTRAINT "magic_plan_door_ventilation_magic_plan_door_id_unique" UNIQUE("magic_plan_door_id")
);
--> statement-breakpoint
CREATE TABLE "magic_plan_window_ventilation" (
"id" bigserial PRIMARY KEY NOT NULL,
"magic_plan_window_id" bigint NOT NULL,
"opening_type" text,
"num_openings" integer,
"pct_openable" integer,
"trickle_vent_area_mm2" integer,
"num_trickle_vents" integer,
CONSTRAINT "magic_plan_window_ventilation_magic_plan_window_id_unique" UNIQUE("magic_plan_window_id")
);
--> statement-breakpoint
ALTER TABLE "magic_plan_door" ADD COLUMN "height_mm" real;--> statement-breakpoint
ALTER TABLE "magic_plan_door_ventilation" ADD CONSTRAINT "magic_plan_door_ventilation_magic_plan_door_id_magic_plan_door_id_fk" FOREIGN KEY ("magic_plan_door_id") REFERENCES "public"."magic_plan_door"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "magic_plan_window_ventilation" ADD CONSTRAINT "magic_plan_window_ventilation_magic_plan_window_id_magic_plan_window_id_fk" FOREIGN KEY ("magic_plan_window_id") REFERENCES "public"."magic_plan_window"("id") ON DELETE cascade ON UPDATE no action;

View file

@ -0,0 +1,10 @@
-- Backfill opening_type into magic_plan_window_ventilation from magic_plan_window.
-- Only rows with a non-null opening_type are backfilled; no ventilation row is
-- created for windows that never had one. The unique constraint on
-- magic_plan_window_id prevents duplicate rows if this runs more than once.
INSERT INTO magic_plan_window_ventilation (magic_plan_window_id, opening_type)
SELECT id, opening_type
FROM magic_plan_window
WHERE opening_type IS NOT NULL
ON CONFLICT (magic_plan_window_id) DO NOTHING;

View file

@ -0,0 +1 @@
ALTER TABLE "magic_plan_window" DROP COLUMN "opening_type";

View file

@ -0,0 +1,4 @@
ALTER TABLE "hubspot_deal_data" ADD COLUMN "date_booking_made" timestamp (6) with time zone;--> statement-breakpoint
ALTER TABLE "hubspot_deal_data" ADD COLUMN "last_contact_date" timestamp (6) with time zone;--> statement-breakpoint
ALTER TABLE "hubspot_deal_data" ADD COLUMN "last_outbound_call" timestamp (6) with time zone;--> statement-breakpoint
ALTER TABLE "hubspot_deal_data" ADD COLUMN "last_outbound_email" timestamp (6) with time zone;

View file

@ -0,0 +1,2 @@
ALTER TYPE "public"."file_source" ADD VALUE 'audit_generator';--> statement-breakpoint
ALTER TYPE "public"."file_type" ADD VALUE 'ventilation_audit' BEFORE 'other';

View file

@ -0,0 +1 @@
ALTER TABLE "hubspot_deal_data" ADD COLUMN "last_submission_date" timestamp (6) with time zone;

View file

@ -0,0 +1 @@
ALTER TABLE "hubspot_deal_data" ADD COLUMN "osmosis_survey_required" boolean;

View file

@ -0,0 +1 @@
ALTER TABLE "hubspot_deal_data" DROP COLUMN "osmosis_survey_required";

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1255,13 +1255,6 @@
"tag": "0178_parched_midnight",
"breakpoints": true
},
{
"idx": 179,
"version": "7",
"when": 1776459924335,
"tag": "0179_mighty_cardiac",
"breakpoints": true
},
{
"idx": 180,
"version": "7",
@ -1520,6 +1513,125 @@
"when": 1779992128370,
"tag": "0216_add_subtask_service",
"breakpoints": true
},
{
"idx": 217,
"version": "7",
"when": 1780404222902,
"tag": "0217_gray_hellion",
"breakpoints": true
},
{
"idx": 218,
"version": "7",
"when": 1780408378351,
"tag": "0218_natural_umar",
"breakpoints": true
},
{
"idx": 219,
"version": "7",
"when": 1780419959831,
"tag": "0219_add_verify_ack",
"breakpoints": true
},
{
"idx": 220,
"version": "7",
"when": 1780491109956,
"tag": "0220_round_retro_girl",
"breakpoints": true
},
{
"idx": 221,
"version": "7",
"when": 1780566543108,
"tag": "0221_nice_sumo",
"breakpoints": true
},
{
"idx": 222,
"version": "7",
"when": 1780647165601,
"tag": "0222_nifty_hellcat",
"breakpoints": true
},
{
"idx": 223,
"version": "7",
"when": 1780647248894,
"tag": "0223_recommendation_plan_id_backfill",
"breakpoints": true
},
{
"idx": 224,
"version": "7",
"when": 1780653770494,
"tag": "0224_busy_nitro",
"breakpoints": true
},
{
"idx": 225,
"version": "7",
"when": 1780654800000,
"tag": "0225_recommendation_material_id_backfill",
"breakpoints": true
},
{
"idx": 226,
"version": "7",
"when": 1780910378978,
"tag": "0226_massive_guardian",
"breakpoints": true
},
{
"idx": 227,
"version": "7",
"when": 1780910400000,
"tag": "0227_magic_plan_window_ventilation_backfill",
"breakpoints": true
},
{
"idx": 228,
"version": "7",
"when": 1780910657380,
"tag": "0228_deep_betty_ross",
"breakpoints": true
},
{
"idx": 229,
"version": "7",
"when": 1780929304935,
"tag": "0229_lively_kree",
"breakpoints": true
},
{
"idx": 230,
"version": "7",
"when": 1781013441630,
"tag": "0230_wet_epoch",
"breakpoints": true
},
{
"idx": 231,
"version": "7",
"when": 1781101526295,
"tag": "0231_bright_loners",
"breakpoints": true
},
{
"idx": 232,
"version": "7",
"when": 1781513516132,
"tag": "0232_wet_lifeguard",
"breakpoints": true
},
{
"idx": 233,
"version": "7",
"when": 1781513971818,
"tag": "0233_abnormal_george_stacy",
"breakpoints": true
}
]
}

View file

@ -1,6 +1,50 @@
import { pgTable, uuid, text, timestamp, jsonb } from "drizzle-orm/pg-core";
import { pgTable, uuid, text, timestamp, jsonb, boolean } from "drizzle-orm/pg-core";
import { sql } from "drizzle-orm";
// Shape of the multi_entry_summary jsonb (ADR-0004). Co-located with the column
// so the schema is self-contained; the detection logic in
// src/lib/bulkUpload/multiEntry.ts imports these.
export interface MultiEntryEntry {
raw: string;
description: string;
}
export interface MultiEntryColumn {
field: string;
header: string;
entries: MultiEntryEntry[];
}
export interface MultiEntrySample {
address: string;
count: number;
columns: MultiEntryColumn[];
}
export interface MultiEntrySummary {
multiValuedFields: string[];
countDistribution: Record<string, number>;
largestCount: number;
// Step 1 (verify) sample: the largest-count row when multi-entry, else the
// first classified row. `null` ⇒ nothing to verify.
sample: MultiEntrySample | null;
// Step 2 (order): one sample per distinct entry-count ≥ 2 present in the file,
// keyed by count. Each count needs its OWN confirmed permutation — a smaller
// count's ordering can't be derived from a larger one (ADR-0004, amended
// 2026-06-05). Absent on uploads detected before that amendment.
samplesByCount?: Record<string, MultiEntrySample>;
}
// User-confirmed building-part ordering (ADR-0004, amended 2026-06-05). Keyed by
// entry-count: a permutation is captured for EVERY distinct count ≥ 2 in the
// file (the v2 fact layer can't derive one count's order from another).
// permutations[count][k] = the 0-based file position holding building part k,
// where 0 = Main building, 1..N-1 = Extension 1..N-1.
// e.g. { "2": [1, 0] } => for 2-part rows the main building is file position 1.
export interface MultiEntryOrdering {
permutations: Record<string, number[]>;
// True once EVERY detected count ≥ 2 has a permutation; gates Finalise when the
// upload is multi-entry.
confirmed: boolean;
}
export const bulkAddressUploads = pgTable("bulk_address_uploads", {
id: uuid("id").defaultRandom().primaryKey(),
portfolioId: text("portfolio_id").notNull(),
@ -11,6 +55,9 @@ export const bulkAddressUploads = pgTable("bulk_address_uploads", {
status: text("status").notNull().default("ready_for_processing"),
sourceHeaders: text("source_headers").array().notNull().default(sql`'{}'`),
columnMapping: jsonb("column_mapping").$type<Record<string, string>>(),
multiEntrySummary: jsonb("multi_entry_summary").$type<MultiEntrySummary>(),
multiEntryOrdering: jsonb("multi_entry_ordering").$type<MultiEntryOrdering>(),
verifyAck: boolean("verify_ack").notNull().default(false),
taskId: uuid("task_id"),
combinedOutputS3Uri: text("combined_output_s3_uri"),
createdAt: timestamp("created_at", { withTimezone: true }).notNull().defaultNow(),

View file

@ -57,7 +57,7 @@ export const hubspotDealData = pgTable("hubspot_deal_data", {
eiScore: text("ei_score"),
eiScorePotential: text("ei_score__potential_"),
epcSapScore: text("epc_sap_score"),
epcSapScorePotential: text("epc_sap_score__potential_"),
epcSapScorePotential: text("epc_sap_score__potential_"), // This is being replaced by potentialPostSapScoreDropdown ^
confirmedSurveyDate: timestamp("confirmed_survey_date", { precision: 6, withTimezone: true }),
confirmedSurveyTime: text("confirmed_survey_time"),
surveyedDate: timestamp("surveyed_date", { precision: 6, withTimezone: true }),
@ -74,6 +74,12 @@ export const hubspotDealData = pgTable("hubspot_deal_data", {
domnaSurveyType: text("domna_survey_type"),
domnaSurveyDate: timestamp("domna_survey_date", { precision: 6, withTimezone: true }),
dateBookingMade: timestamp("date_booking_made", { precision: 6, withTimezone: true }),
lastContactDate: timestamp("last_contact_date", { precision: 6, withTimezone: true }),
lastOutboundCall: timestamp("last_outbound_call", { precision: 6, withTimezone: true }),
lastOutboundEmail: timestamp("last_outbound_email", { precision: 6, withTimezone: true }),
lastSubmissionDate: timestamp("last_submission_date", { precision: 6, withTimezone: true }),
createdAt: timestamp("created_at", { precision: 6, withTimezone: true })
.defaultNow()
.notNull(),

View file

@ -9,6 +9,7 @@ export const magicPlanDoor = pgTable(
.notNull()
.references(() => magicPlanRoom.id),
widthMm: real("width_mm"),
heightMm: real("height_mm"),
type: text("type"),
},
);

View file

@ -0,0 +1,14 @@
import { pgTable, bigserial, bigint, real } from "drizzle-orm/pg-core";
import { magicPlanDoor } from "./door";
export const magicPlanDoorVentilation = pgTable(
"magic_plan_door_ventilation",
{
id: bigserial("id", { mode: "bigint" }).primaryKey(),
doorId: bigint("magic_plan_door_id", { mode: "bigint" })
.notNull()
.unique()
.references(() => magicPlanDoor.id, { onDelete: "cascade" }),
undercutMm: real("undercut_mm"),
},
);

View file

@ -1,16 +1,15 @@
import { pgTable, bigserial, bigint, text, real } from "drizzle-orm/pg-core";
import { pgTable, bigserial, bigint, real } from "drizzle-orm/pg-core";
import { magicPlanRoom } from "./room";
export const magicPlanWindow = pgTable(
"magic_plan_window",
{
id: bigserial("id", { mode: "bigint" }).primaryKey(),
roomId: bigint("magic_plan_room_id", { mode: "bigint" })
id: bigserial("id", { mode: "bigint" }).primaryKey(),
roomId: bigint("magic_plan_room_id", { mode: "bigint" })
.notNull()
.references(() => magicPlanRoom.id),
widthM: real("width_m"),
heightM: real("height_m"),
areaM2: real("area_m2"),
openingType: text("opening_type"),
widthM: real("width_m"),
heightM: real("height_m"),
areaM2: real("area_m2"),
},
);

View file

@ -0,0 +1,18 @@
import { pgTable, bigserial, bigint, text, integer } from "drizzle-orm/pg-core";
import { magicPlanWindow } from "./window";
export const magicPlanWindowVentilation = pgTable(
"magic_plan_window_ventilation",
{
id: bigserial("id", { mode: "bigint" }).primaryKey(),
windowId: bigint("magic_plan_window_id", { mode: "bigint" })
.notNull()
.unique()
.references(() => magicPlanWindow.id, { onDelete: "cascade" }),
openingType: text("opening_type"),
numOpenings: integer("num_openings"),
pctOpenable: integer("pct_openable"),
trickleVentAreaMm2: integer("trickle_vent_area_mm2"),
numTrickleVents: integer("num_trickle_vents"),
},
);

View file

@ -10,6 +10,7 @@ import {
smallint,
bigint,
uniqueIndex,
jsonb,
} from "drizzle-orm/pg-core";
import { portfolio, PortfolioStatus } from "./portfolio";
import { InferModel } from "drizzle-orm";
@ -514,25 +515,25 @@ export const epcProperty = pgTable(
energyIsDwellingExportCapable: boolean("energy_is_dwelling_export_capable").notNull(),
energyWindTurbinesTerrainType: text("energy_wind_turbines_terrain_type").notNull(),
energyElectricitySmartMeterPresent: boolean("energy_electricity_smart_meter_present").notNull(),
energyPvConnection: text("energy_pv_connection"),
energyPvConnection: jsonb("energy_pv_connection"),
energyPvPercentRoofArea: integer("energy_pv_percent_roof_area"),
energyPvBatteryCapacity: real("energy_pv_battery_capacity"),
energyWindTurbineHubHeight: real("energy_wind_turbine_hub_height"),
energyWindTurbineRotorDiameter: real("energy_wind_turbine_rotor_diameter"),
// Heating config
heatingCylinderSize: text("heating_cylinder_size"),
heatingCylinderSize: jsonb("heating_cylinder_size"),
heatingWaterHeatingCode: integer("heating_water_heating_code"),
heatingWaterHeatingFuel: integer("heating_water_heating_fuel"),
heatingImmersionHeatingType: text("heating_immersion_heating_type"),
heatingCylinderInsulationType: text("heating_cylinder_insulation_type"),
heatingImmersionHeatingType: jsonb("heating_immersion_heating_type"),
heatingCylinderInsulationType: jsonb("heating_cylinder_insulation_type"),
heatingCylinderThermostat: text("heating_cylinder_thermostat"),
heatingSecondaryFuelType: integer("heating_secondary_fuel_type"),
heatingSecondaryHeatingType: text("heating_secondary_heating_type"),
heatingSecondaryHeatingType: jsonb("heating_secondary_heating_type"),
heatingCylinderInsulationThicknessMm: integer("heating_cylinder_insulation_thickness_mm"),
heatingWwhrsIndexNumber1: integer("heating_wwhrs_index_number_1"),
heatingWwhrsIndexNumber2: integer("heating_wwhrs_index_number_2"),
heatingShowerOutletType: text("heating_shower_outlet_type"),
heatingShowerOutletType: jsonb("heating_shower_outlet_type"),
heatingShowerWwhrs: integer("heating_shower_wwhrs"),
// Ventilation
@ -553,6 +554,27 @@ export const epcProperty = pgTable(
mechanicalVentDuctInsulation: integer("mechanical_vent_duct_insulation"),
mechanicalVentilationIndexNumber: integer("mechanical_ventilation_index_number"),
mechanicalVentMeasuredInstallation: text("mechanical_vent_measured_installation"),
mechanicalVentDuctInsulationLevel: integer("mechanical_vent_duct_insulation_level"),
// Addendum flags
addendumStoneWalls: boolean("addendum_stone_walls"),
addendumSystemBuild: boolean("addendum_system_build"),
addendumNumbers: jsonb("addendum_numbers"),
// Heating counts
heatingNumberBaths: integer("heating_number_baths"),
heatingNumberBathsWwhrs: integer("heating_number_baths_wwhrs"),
heatingElectricShowerCount: integer("heating_electric_shower_count"),
heatingMixerShowerCount: integer("heating_mixer_shower_count"),
// Ventilation detail
ventilationPresent: boolean("ventilation_present").notNull().default(false),
ventilationShelteredSides: integer("ventilation_sheltered_sides"),
ventilationHasSuspendedTimberFloor: boolean("ventilation_has_suspended_timber_floor"),
ventilationSuspendedTimberFloorSealed: boolean("ventilation_suspended_timber_floor_sealed"),
ventilationHasDraughtLobby: boolean("ventilation_has_draught_lobby"),
ventilationAirPermeabilityAp4M3HM2: real("ventilation_air_permeability_ap4_m3_h_m2"),
ventilationMechanicalVentilationKind: text("ventilation_mechanical_ventilation_kind"),
},
(table) => [
uniqueIndex("uq_epc_property_property_portfolio").on(
@ -627,10 +649,10 @@ export const epcMainHeatingDetail = pgTable(
.references(() => epcProperty.id),
hasFghrs: boolean("has_fghrs").notNull(),
mainFuelType: text("main_fuel_type").notNull(),
heatEmitterType: text("heat_emitter_type").notNull(),
emitterTemperature: text("emitter_temperature").notNull(),
mainHeatingControl: text("main_heating_control").notNull(),
mainFuelType: jsonb("main_fuel_type").notNull(),
heatEmitterType: jsonb("heat_emitter_type").notNull(),
emitterTemperature: jsonb("emitter_temperature").notNull(),
mainHeatingControl: jsonb("main_heating_control").notNull(),
fanFluePresent: boolean("fan_flue_present"),
boilerFlueType: integer("boiler_flue_type"),
boilerIgnitionType: integer("boiler_ignition_type"),
@ -661,10 +683,10 @@ export const epcBuildingPart = pgTable(
constructionAgeBand: text("construction_age_band").notNull(),
// Wall
wallConstruction: text("wall_construction").notNull(),
wallInsulationType: text("wall_insulation_type").notNull(),
wallConstruction: jsonb("wall_construction").notNull(),
wallInsulationType: jsonb("wall_insulation_type").notNull(),
wallThicknessMeasured: boolean("wall_thickness_measured").notNull(),
partyWallConstruction: text("party_wall_construction").notNull(),
partyWallConstruction: jsonb("party_wall_construction"),
buildingPartNumber: integer("building_part_number"),
wallDryLined: boolean("wall_dry_lined"),
wallThicknessMm: integer("wall_thickness_mm"),
@ -674,7 +696,7 @@ export const epcBuildingPart = pgTable(
// Floor
floorHeatLoss: integer("floor_heat_loss"),
floorInsulationThickness: text("floor_insulation_thickness"),
flatRoofInsulationThickness: text("flat_roof_insulation_thickness"),
flatRoofInsulationThickness: jsonb("flat_roof_insulation_thickness"),
floorType: text("floor_type"),
floorConstructionType: text("floor_construction_type"),
floorInsulationTypeStr: text("floor_insulation_type_str"),
@ -682,8 +704,11 @@ export const epcBuildingPart = pgTable(
// Roof
roofConstruction: integer("roof_construction"),
roofInsulationLocation: text("roof_insulation_location"),
roofInsulationThickness: text("roof_insulation_thickness"),
roofInsulationLocation: jsonb("roof_insulation_location"),
roofInsulationThickness: jsonb("roof_insulation_thickness"),
roofConstructionType: text("roof_construction_type"),
curtainWallAge: text("curtain_wall_age"),
// Room in roof (inlined)
roomInRoofFloorArea: real("room_in_roof_floor_area"),
@ -737,23 +762,23 @@ export const epcWindow = pgTable(
.notNull()
.references(() => epcProperty.id),
glazingGap: text("glazing_gap").notNull(),
orientation: text("orientation").notNull(),
windowType: text("window_type").notNull(),
glazingType: text("glazing_type").notNull(),
glazingGap: jsonb("glazing_gap").notNull(),
orientation: jsonb("orientation").notNull(),
windowType: jsonb("window_type").notNull(),
glazingType: jsonb("glazing_type").notNull(),
windowWidth: real("window_width").notNull(), // add unit?
windowHeight: real("window_height").notNull(), // add unit?
draughtProofed: boolean("draught_proofed").notNull(),
windowLocation: text("window_location").notNull(),
windowWallType: text("window_wall_type").notNull(),
permanentShuttersPresent: boolean("permanent_shutters_present").notNull(),
draughtProofed: jsonb("draught_proofed").notNull(),
windowLocation: jsonb("window_location").notNull(),
windowWallType: jsonb("window_wall_type").notNull(),
permanentShuttersPresent: jsonb("permanent_shutters_present").notNull(),
frameMaterial: text("frame_material"),
frameFactor: real("frame_factor"),
permanentShuttersInsulated: text("permanent_shutters_insulated"),
// Transmission details (inlined)
transmissionUValue: real("transmission_u_value"),
transmissionDataSource: text("transmission_data_source"),
transmissionDataSource: jsonb("transmission_data_source"),
transmissionSolarTransmittance: real("transmission_solar_transmittance"),
},
);
@ -773,4 +798,86 @@ export const epcEnergyElement = pgTable(
energyEfficiencyRating: integer("energy_efficiency_rating").notNull(),
environmentalEfficiencyRating: integer("environmental_efficiency_rating").notNull(),
},
);
// ─── epc_renewable_heat_incentive ─────────────────────────────────────────────
export const epcRenewableHeatIncentive = pgTable(
"epc_renewable_heat_incentive",
{
id: bigserial("id", { mode: "bigint" }).primaryKey(),
epcPropertyId: bigint("epc_property_id", { mode: "bigint" })
.notNull()
.unique()
.references(() => epcProperty.id),
spaceHeatingKwh: real("space_heating_kwh").notNull(),
waterHeatingKwh: real("water_heating_kwh").notNull(),
impactOfLoftInsulationKwh: real("impact_of_loft_insulation_kwh"),
impactOfCavityInsulationKwh: real("impact_of_cavity_insulation_kwh"),
impactOfSolidWallInsulationKwh: real("impact_of_solid_wall_insulation_kwh"),
},
);
// ─── property_baseline_performance ────────────────────────────────────────────
export const rebaselineReasonEnum = pgEnum("rebaseline_reason", [
"none",
"pre_sap10",
"physical_state_changed",
"both",
]);
export const propertyBaselinePerformance = pgTable(
"property_baseline_performance",
{
id: bigserial("id", { mode: "bigint" }).primaryKey(),
propertyId: bigint("property_id", { mode: "bigint" })
.notNull()
.unique()
.references(() => property.id),
// Lodged performance (from gov EPC register)
lodgedSapScore: integer("lodged_sap_score").notNull(),
lodgedEpcBand: epcEnum("lodged_epc_band").notNull(),
lodgedCo2EmissionsTPerYr: real("lodged_co2_emissions_t_per_yr").notNull(),
lodgedPrimaryEnergyIntensityKwhPerM2Yr: integer(
"lodged_primary_energy_intensity_kwh_per_m2_yr",
).notNull(),
// Effective performance (what modelling scored against)
effectiveSapScore: integer("effective_sap_score").notNull(),
effectiveEpcBand: epcEnum("effective_epc_band").notNull(),
effectiveCo2EmissionsTPerYr: real(
"effective_co2_emissions_t_per_yr",
).notNull(),
effectivePrimaryEnergyIntensityKwhPerM2Yr: integer(
"effective_primary_energy_intensity_kwh_per_m2_yr",
).notNull(),
rebaselineReason: rebaselineReasonEnum("rebaseline_reason").notNull(),
// Interim energy demand (from EPC RHI data; superseded by bill block below once populated)
spaceHeatingKwh: real("space_heating_kwh").notNull(),
waterHeatingKwh: real("water_heating_kwh").notNull(),
// Bill block — nullable until BillDerivation wiring lands
fuelRatesPeriod: text("fuel_rates_period"),
heatingKwh: real("heating_kwh"),
heatingCostGbp: real("heating_cost_gbp"),
hotWaterKwh: real("hot_water_kwh"),
hotWaterCostGbp: real("hot_water_cost_gbp"),
lightingKwh: real("lighting_kwh"),
lightingCostGbp: real("lighting_cost_gbp"),
appliancesKwh: real("appliances_kwh"),
appliancesCostGbp: real("appliances_cost_gbp"),
cookingKwh: real("cooking_kwh"),
cookingCostGbp: real("cooking_cost_gbp"),
pumpsFansKwh: real("pumps_fans_kwh"),
pumpsFansCostGbp: real("pumps_fans_cost_gbp"),
coolingKwh: real("cooling_kwh"),
coolingCostGbp: real("cooling_cost_gbp"),
standingChargesGbp: real("standing_charges_gbp"),
segCreditGbp: real("seg_credit_gbp"),
totalAnnualBillGbp: real("total_annual_bill_gbp"),
},
);

View file

@ -0,0 +1,68 @@
import {
bigint,
pgEnum,
pgTable,
smallint,
text,
timestamp,
unique,
uuid,
} from "drizzle-orm/pg-core";
import { portfolio } from "./portfolio";
import { property } from "./property";
// The per-Property fact layer deferred by ADR-0004: one row per
// (property, building_part, override_component) carrying the resolved enum value
// as a denormalised snapshot. Design + rationale (Q6Q9, snapshot-not-FK,
// recalculate-on-rerun) live in docs/design/bulk-upload-finaliser.md.
//
// `override_component` values mirror the classifier category keys used in BOTH the
// frontend (src/lib/bulkUpload/columnFields.ts) and the Model backend
// (ClassifiableColumn.name), so the finaliser maps category → component with no
// translation. This is the only DB-level typing left on a row — `override_value`
// is a free-text snapshot of the resolved enum from `landlord_*_overrides`.
export const OverrideComponentValues: [string, ...string[]] = [
"wall_type",
"roof_type",
"property_type",
"built_form_type",
];
export const overrideComponentEnum = pgEnum(
"override_component",
OverrideComponentValues,
);
export const propertyOverrides = pgTable(
"property_overrides",
{
id: uuid("id").defaultRandom().primaryKey(),
propertyId: bigint("property_id", { mode: "bigint" })
.notNull()
.references(() => property.id, { onDelete: "cascade" }),
portfolioId: bigint("portfolio_id", { mode: "bigint" })
.notNull()
.references(() => portfolio.id, { onDelete: "cascade" }),
// 0 = main building, 1 = extension 1, 2 = extension 2, … (ADR-0004 ordering).
buildingPart: smallint("building_part").notNull(),
overrideComponent: overrideComponentEnum("override_component").notNull(),
// Denormalised snapshot copy of the resolved enum from landlord_*_overrides.
overrideValue: text("override_value").notNull(),
// Raw spreadsheet cell text this snapshot resolved from (provenance + re-sync key).
originalSpreadsheetDescription: text(
"original_spreadsheet_description",
).notNull(),
createdAt: timestamp("created_at", { withTimezone: true })
.notNull()
.defaultNow(),
updatedAt: timestamp("updated_at", { withTimezone: true })
.notNull()
.defaultNow()
.$onUpdate(() => new Date()),
},
(table) => ({
propertyComponentPartUnique: unique(
"property_overrides_property_component_part_unique",
).on(table.propertyId, table.overrideComponent, table.buildingPart),
}),
);

View file

@ -60,6 +60,9 @@ export const measureTypeEnum = pgEnum("measure_type", [
"sealing_open_fireplace",
]);
export const unitQuantity: [string, ...string[]] = ["m2", "part", "kwp"];
export const unitQuantityEnum = pgEnum("unit_quantity", unitQuantity);
export const recommendation = pgTable(
"recommendation",
{
@ -67,6 +70,16 @@ export const recommendation = pgTable(
propertyId: bigint("property_id", { mode: "bigint" })
.notNull()
.references(() => property.id),
planId: bigint("plan_id", { mode: "bigint" }).references(() => plan.id, {
onDelete: "cascade",
}),
materialId: bigint("material_id", { mode: "bigint" }).references(
() => material.id,
{ onDelete: "set null" }
),
materialQuantity: real("material_quantity"),
materialQuantityUnit: unitQuantityEnum("material_quantity_unit"),
materialDepth: real("material_depth"),
createdAt: timestamp("created_at").notNull().defaultNow(),
type: text("type").notNull(),
measureType: text("measure_type"),
@ -92,6 +105,7 @@ export const recommendation = pgTable(
},
(table) => [
index("recommendation_property_id_idx").on(table.propertyId),
index("idx_recommendation_plan_id").on(table.planId),
index("idx_recommendation_active_defaults")
.on(table.id)
.where(
@ -103,12 +117,11 @@ export const recommendation = pgTable(
.where(
sql`${table.default} = true AND ${table.alreadyInstalled} = false`,
),
index("idx_recommendation_material_id").on(table.materialId),
],
);
export const unitQuantity: [string, ...string[]] = ["m2", "part", "kwp"];
export const unitQuantityEnum = pgEnum("unit_quantity", unitQuantity);
export const recommendationMaterials = pgTable(
"recommendation_materials",
{

View file

@ -54,7 +54,12 @@ export const fileType = pgEnum("file_type", [
"improvement_option_evaluation",
"medium_term_improvement_plan",
// Design
"retrofit_design_doc"
"retrofit_design_doc",
// Audit
"ventilation_audit",
// Other
"other"
]);
export const fileSource = pgEnum("file_source", [
@ -64,7 +69,8 @@ export const fileSource = pgEnum("file_source", [
"ecmk",
"contractor",
"magic_plan",
"coordination_hub"
"coordination_hub",
"audit_generator"
]);
export const uploadedFiles = pgTable(

View file

@ -1,18 +1,59 @@
"use client";
import { useState } from "react";
import { useRouter } from "next/navigation";
import Link from "next/link";
import { ArrowRightIcon } from "@heroicons/react/24/outline";
import { ArrowRightIcon, CheckCircleIcon } from "@heroicons/react/24/outline";
import {
useBulkUploadProgress,
useConfirmMultiEntryOrdering,
useConfirmVerification,
useEditClassification,
useFinalize,
useRequestCombine,
useSampleClassifications,
type SampleClassifications,
} from "@/lib/bulkUpload/client";
import {
partLabel,
isPermutation,
assignmentToPermutation,
type MultiEntrySample,
} from "@/lib/bulkUpload/multiEntry";
import type { MultiEntryOrdering } from "@/app/db/schema/bulk_address_uploads";
import {
PropertyTypeValues,
BuiltFormTypeValues,
WallTypeValues,
RoofTypeValues,
} from "@/app/db/schema/landlord_overrides";
import { CLASSIFIER_FIELDS } from "@/lib/bulkUpload/columnFields";
import { statusLabel, isTerminalStatus } from "@/lib/bulkUpload/types";
// Valid enum options per classifier category, for the editable dropdowns (#299).
const CATEGORY_VALUES: Record<string, readonly string[]> = {
property_type: PropertyTypeValues,
built_form_type: BuiltFormTypeValues,
wall_type: WallTypeValues,
roof_type: RoofTypeValues,
};
// Our category label per classifier field (e.g. built_form_type → "Built Form").
// Distinguishes the categories when several read from the same source column —
// Property Type and Built Form both come from one column, so labelling by the
// customer's header alone is ambiguous.
const FIELD_LABEL: Record<string, string> = Object.fromEntries(
CLASSIFIER_FIELDS.map((f) => [f.value, f.label]),
);
interface Props {
portfolioSlug: string;
portfolioId: string;
uploadId: string;
// The status at the last server render. Used to refresh the server page exactly
// once when polling first observes a terminal status (async finalise, ADR-0005),
// so the page advances from "Uploading to ARA" to the "Processing complete" card.
serverStatus: string;
isDomnaUser: boolean;
}
@ -23,13 +64,33 @@ export default function OnboardingProgress({
portfolioSlug,
portfolioId,
uploadId,
serverStatus,
isDomnaUser,
}: Props) {
const router = useRouter();
const progress = useBulkUploadProgress(portfolioId, uploadId);
const progress = useBulkUploadProgress(portfolioId, uploadId, {
// When the async finaliser finishes, the poll flips the status to a terminal
// value while the server page is still on `finalising`. Refresh once so the
// server re-renders the "Processing complete" / "failed" card. Guarding on the
// non-terminal serverStatus prevents a refresh loop: after the refresh the
// prop is terminal, so this no-ops.
onSuccess: (data) => {
if (!isTerminalStatus(serverStatus) && isTerminalStatus(data.upload.status)) {
router.refresh();
}
},
});
const combine = useRequestCombine(portfolioId, uploadId);
const finalize = useFinalize(portfolioId, uploadId);
// Read-only classifications for the multi-entry sample (issue #298). Fetched
// only once a sample exists at awaiting_review. Hook stays above the early
// returns so its order is stable.
const sampleReady =
progress.data?.upload.status === "awaiting_review" &&
!!progress.data.upload.multiEntrySummary?.sample;
const classifications = useSampleClassifications(portfolioId, uploadId, sampleReady);
if (progress.isError) return null;
if (!progress.data) {
return (
@ -41,19 +102,61 @@ export default function OnboardingProgress({
}
const { task, upload } = progress.data;
const total = task?.totalSubtasks ?? 0;
const completedSubtasks = task?.completedSubtasks ?? 0;
const failedSubtasks = task?.failedSubtasks ?? 0;
// Address-matching batches drive the bar; classification is shown separately.
const total = task?.addressTotal ?? 0;
const completedSubtasks = task?.addressCompleted ?? 0;
const failedSubtasks = task?.addressFailed ?? 0;
const percent = total > 0 ? Math.round((completedSubtasks / total) * 100) : 0;
const classifierTotal = task?.classifierTotal ?? 0;
const classifierCompleted = task?.classifierCompleted ?? 0;
const classifierFailed = task?.classifierFailed ?? 0;
const taskStatus = task?.status.toLowerCase() ?? "";
const taskDone = TASK_TERMINAL_STATUSES.has(taskStatus);
const taskFailed = TASK_FAILED_STATUSES.has(taskStatus);
const isCombining = upload.status === "combining";
const isImporting = upload.status === "awaiting_review";
// Async finaliser in flight (ADR-0005). Polling continues (non-terminal) until
// the backend writes complete/failed. Surfaced to the user as "Uploading to ARA".
const isFinalising = upload.status === "finalising";
const canRunCombiner = taskDone && !taskFailed && upload.status === "processing";
const canFinalize = upload.status === "awaiting_review";
const isAwaitingReview = upload.status === "awaiting_review";
// Two-step review on the awaiting_review surface (ADR-0004). The sample exists
// whenever classifier columns were mapped; multi-entry is largestCount >= 2.
// Step 1 (verify) applies whenever there's a sample; Step 2 (order) only when
// multi-entry. Each gates Finalise where it applies.
const sample = isAwaitingReview ? (upload.multiEntrySummary?.sample ?? null) : null;
const isMultiEntry = (upload.multiEntrySummary?.largestCount ?? 0) >= 2;
const verifyAck = upload.verifyAck ?? false;
const orderingConfirmed = upload.multiEntryOrdering?.confirmed ?? false;
const needsVerify = !!sample;
const needsOrdering = !!sample && isMultiEntry;
// One ordering panel per distinct count ≥ 2, ascending (ADR-0004 amendment).
// Fall back to the single Step-1 sample for uploads detected before per-count
// capture existed (samplesByCount absent).
const samplesByCount = upload.multiEntrySummary?.samplesByCount;
const orderingSamples: Array<[string, MultiEntrySample]> =
samplesByCount && Object.keys(samplesByCount).length > 0
? Object.entries(samplesByCount).sort(([a], [b]) => Number(a) - Number(b))
: sample && isMultiEntry
? [[String(sample.count), sample]]
: [];
const showStepNumbers = needsVerify && needsOrdering;
// Descriptions still classified `Unknown` block Finalise — the user must map
// every one to a real value, else the finaliser fails loudly (ADR-0006).
const unknownByField = classifications.data?.unknown ?? {};
const unknownTotal = Object.values(unknownByField).reduce(
(n, descriptions) => n + descriptions.length,
0,
);
const canFinalize =
isAwaitingReview &&
(!needsVerify || verifyAck) &&
(!needsOrdering || orderingConfirmed) &&
unknownTotal === 0;
return (
<div className="mt-6 space-y-3">
@ -65,21 +168,40 @@ export default function OnboardingProgress({
</div>
<div className="flex items-center gap-4 text-xs text-gray-500">
{total > 0 && (
<span>
<span className="font-semibold text-gray-700">{completedSubtasks}</span> / {total} batches complete
</span>
)}
{failedSubtasks > 0 && (
<span className="flex items-center gap-1 text-red-500 font-semibold">
<span className="w-1.5 h-1.5 rounded-full bg-red-400" />
{failedSubtasks} failed
</span>
)}
{!taskDone && (
<span className="flex items-center gap-1 text-blue-500">
<span className="w-1.5 h-1.5 rounded-full bg-blue-400 animate-pulse" />
Running
{/* Address matching: standardises addresses against the OS lookup, in batches. */}
<span className="flex items-center gap-1">
<span className="text-gray-400">Address matching:</span>
{failedSubtasks > 0 ? (
<span className="flex items-center gap-1 text-red-500 font-semibold">
<span className="w-1.5 h-1.5 rounded-full bg-red-400" />
{failedSubtasks} of {total} batches failed
</span>
) : total > 0 && completedSubtasks >= total ? (
<span className="font-semibold text-green-600">complete</span>
) : (
<span className="flex items-center gap-1 text-blue-500">
<span className="w-1.5 h-1.5 rounded-full bg-blue-400 animate-pulse" />
running{total > 0 ? ` · ${completedSubtasks} / ${total} batches` : ""}
</span>
)}
</span>
{/* Classification: turns the landlord's free-text descriptions into EPC categories. */}
{classifierTotal > 0 && (
<span className="flex items-center gap-1">
<span className="text-gray-400">Classification:</span>
{classifierFailed > 0 ? (
<span className="flex items-center gap-1 text-red-500 font-semibold">
<span className="w-1.5 h-1.5 rounded-full bg-red-400" />
failed
</span>
) : classifierCompleted >= classifierTotal ? (
<span className="font-semibold text-green-600">complete</span>
) : (
<span className="flex items-center gap-1 text-blue-500">
<span className="w-1.5 h-1.5 rounded-full bg-blue-400 animate-pulse" />
running
</span>
)}
</span>
)}
{isCombining && (
@ -94,9 +216,58 @@ export default function OnboardingProgress({
Awaiting import
</span>
)}
{isFinalising && (
<span className="flex items-center gap-1 text-blue-500">
<span className="w-1.5 h-1.5 rounded-full bg-blue-400 animate-pulse" />
{statusLabel("finalising")}
</span>
)}
</div>
{(canRunCombiner || canFinalize) && (
{needsVerify && sample && (
<VerifyClassificationPanel
sample={sample}
classifications={classifications.data?.classifications ?? {}}
verified={verifyAck}
stepLabel={showStepNumbers ? "Step 1" : undefined}
portfolioId={portfolioId}
uploadId={uploadId}
/>
)}
{isAwaitingReview && unknownTotal > 0 && (
<UnresolvedClassificationsPanel
unknown={unknownByField}
portfolioId={portfolioId}
uploadId={uploadId}
/>
)}
{needsOrdering && orderingSamples.length > 0 && (
<div className="space-y-3">
{orderingSamples.map(([count, orderSample], i) => (
<MultiEntryOrderingPanel
key={count}
sample={orderSample}
ordering={upload.multiEntryOrdering ?? null}
classifications={classifications.data?.classifications ?? {}}
// Number the panels only when there's also a verify step or more
// than one count, so a lone ordering panel stays unnumbered.
stepLabel={
showStepNumbers
? `Step ${i + 2}`
: orderingSamples.length > 1
? `Part group ${i + 1}`
: undefined
}
portfolioId={portfolioId}
uploadId={uploadId}
/>
))}
</div>
)}
{(canRunCombiner || isAwaitingReview) && (
<div className="flex flex-col gap-2 pt-2">
{canRunCombiner && (
<StageButton
@ -106,11 +277,19 @@ export default function OnboardingProgress({
onClick={() => combine.mutate()}
/>
)}
{canFinalize && (
{isAwaitingReview && (
<StageButton
label="Finalise"
activeLabel="Finalising…"
isPending={finalize.isPending}
disabled={!canFinalize}
disabledReason={
unknownTotal > 0
? `Resolve ${unknownTotal} unclassified description${unknownTotal === 1 ? "" : "s"} first`
: needsVerify && !verifyAck
? "Verify the classification first"
: "Confirm the building-part order first"
}
onClick={() =>
finalize.mutate(undefined, { onSuccess: () => router.refresh() })
}
@ -141,23 +320,395 @@ export default function OnboardingProgress({
);
}
// Step 1 — Verify classification (ADR-0004). Lists how we read the sample
// address's descriptions (per mapped classifier column) and lets the user
// correct any, written back as source='user'. Acknowledging unlocks Finalise.
// Shown whenever classifier columns were mapped, multi-entry or not.
function VerifyClassificationPanel({
sample,
classifications,
verified,
stepLabel,
portfolioId,
uploadId,
}: {
sample: MultiEntrySample;
classifications: SampleClassifications;
verified: boolean;
stepLabel?: string;
portfolioId: string;
uploadId: string;
}) {
const editClassification = useEditClassification(portfolioId, uploadId);
const confirm = useConfirmVerification(portfolioId, uploadId);
return (
<div className="rounded-lg border border-amber-200 bg-amber-50 px-4 py-3">
<p className="text-sm font-semibold text-amber-900">
{stepLabel ? `${stepLabel}` : ""}Verify classification
</p>
<p className="mt-0.5 text-xs text-amber-800">
Based on your column mapping, here&apos;s how we read{" "}
{sample.address ? <span className="font-medium">{sample.address}</span> : "one sample address"}.
Correct anything that&apos;s wrong, then confirm.
</p>
<div className="mt-3 space-y-3">
{sample.columns.map((column) => {
// One editable row per distinct description, in file order.
const seen = new Set<string>();
const entries = column.entries.filter((entry) => {
if (seen.has(entry.description)) return false;
seen.add(entry.description);
return true;
});
const options = CATEGORY_VALUES[column.field] ?? [];
return (
<div key={column.field}>
<p className="text-[11px] font-semibold uppercase tracking-wide text-amber-700">
{FIELD_LABEL[column.field] ?? column.field}
<span className="ml-1.5 font-normal normal-case tracking-normal text-amber-600">
from your &ldquo;{column.header}&rdquo; column
</span>
</p>
<div className="mt-1 space-y-1">
{entries.map((entry) => {
const classified = classifications[column.field]?.[entry.description] ?? "";
return (
<div
key={entry.description}
className="flex items-center gap-2 text-xs text-amber-900"
>
<span className="min-w-0 flex-1 truncate" title={entry.raw}>
{entry.raw}
</span>
<span className="text-amber-400"></span>
<select
value={classified}
onChange={(e) =>
editClassification.mutate({
field: column.field,
description: entry.description,
value: e.target.value,
})
}
disabled={editClassification.isPending}
className="max-w-[14rem] rounded border border-amber-300 bg-white px-1.5 py-0.5 text-[11px] text-amber-900"
>
<option value="" disabled>
{classified ? classified : "not classified"}
</option>
{options.map((opt) => (
<option key={opt} value={opt}>
{opt}
</option>
))}
</select>
</div>
);
})}
</div>
</div>
);
})}
</div>
<p className="mt-2 text-[11px] text-amber-700">
Correcting a classification updates that description for{" "}
<span className="font-medium">every</span> row across the portfolio, not
just this address.
</p>
{editClassification.error && (
<p className="mt-1 text-xs text-red-600">{editClassification.error.message}</p>
)}
<div className="mt-3 flex items-center gap-3">
<button
onClick={() => confirm.mutate()}
disabled={confirm.isPending}
className={`inline-flex items-center gap-1.5 rounded-lg px-4 py-1.5 text-xs font-bold text-white transition-opacity ${
confirm.isPending ? "bg-amber-300 cursor-not-allowed" : "bg-amber-600 hover:opacity-90"
}`}
>
{confirm.isPending ? "Saving…" : verified ? "Re-confirm" : "These look right"}
</button>
{verified && !confirm.isPending && (
<span className="inline-flex items-center gap-1 text-xs font-semibold text-green-600">
<CheckCircleIcon className="h-4 w-4" />
Classification verified
</span>
)}
</div>
{confirm.error && (
<p className="mt-2 text-xs text-red-600">{confirm.error.message}</p>
)}
</div>
);
}
// Interactive building-part ordering for ONE entry-count's sample (ADR-0004,
// amended 2026-06-05 — one panel per distinct count). The user labels each file
// position with a building part (one Main building + Extensions); the labels
// must form a permutation. Confirming persists this count's ordering (merged
// server-side with the other counts'); Finalise unlocks once every count is
// confirmed.
function MultiEntryOrderingPanel({
sample,
ordering,
classifications,
stepLabel,
portfolioId,
uploadId,
}: {
sample: MultiEntrySample;
ordering: MultiEntryOrdering | null;
classifications: SampleClassifications;
stepLabel?: string;
portfolioId: string;
uploadId: string;
}) {
const confirm = useConfirmMultiEntryOrdering(portfolioId, uploadId);
const count = sample.count;
// Only the multi-valued columns are sliced into building parts; single-value
// columns are whole-dwelling facts (verified in Step 1, not ordered here).
const orderColumns = sample.columns.filter((column) => column.entries.length > 1);
// assignment[filePosition] = building-part slot. Seed from a stored ordering
// (slot -> position, so invert) or default to identity (main building first).
const [assignment, setAssignment] = useState<number[]>(() => {
const stored = ordering?.permutations?.[String(count)];
if (stored && stored.length === count) {
const seeded = new Array<number>(count);
stored.forEach((position, slot) => {
seeded[position] = slot;
});
return seeded;
}
return Array.from({ length: count }, (_, i) => i);
});
// Per-panel confirmation reflects whether THIS count's permutation is stored,
// not the global all-counts-confirmed flag — so each panel gives its own
// feedback as the user works through them.
const confirmed = Array.isArray(ordering?.permutations?.[String(count)]);
const valid = isPermutation(assignment);
const setSlot = (position: number, slot: number) =>
setAssignment((prev) => prev.map((s, i) => (i === position ? slot : s)));
const onConfirm = () => {
if (!valid) return;
confirm.mutate({
permutations: { [String(count)]: assignmentToPermutation(assignment) },
});
};
return (
<div className="rounded-lg border border-amber-200 bg-amber-50 px-4 py-3">
<p className="text-sm font-semibold text-amber-900">
{stepLabel ? `${stepLabel}` : ""}Confirm building-part order
</p>
<p className="mt-0.5 text-xs text-amber-800">
{sample.address ? <span className="font-medium">{sample.address}</span> : "An address"}{" "}
has {count} building parts. Tell us which entry is the main building and
which are extensions we&apos;ll apply the same order to every{" "}
{count}-part row in this file.
</p>
<div className="mt-3 overflow-x-auto">
<table className="w-full border-collapse text-xs">
<thead>
<tr className="text-left text-amber-700">
<th className="py-1 pr-3 font-medium">Entry</th>
{orderColumns.map((column) => (
<th key={column.field} className="py-1 pr-3 font-medium">
{FIELD_LABEL[column.field] ?? column.header}
<span className="block text-[10px] font-normal text-amber-500">
your &ldquo;{column.header}&rdquo;
</span>
</th>
))}
<th className="py-1 pr-3 font-medium">Building part</th>
</tr>
</thead>
<tbody>
{Array.from({ length: count }).map((_, position) => (
<tr key={position} className="border-t border-amber-100 text-amber-900">
<td className="py-1 pr-3 text-amber-600">{position + 1}</td>
{orderColumns.map((column) => {
const entry = column.entries[position];
const classified = entry
? classifications[column.field]?.[entry.description] ?? ""
: "";
return (
<td key={column.field} className="py-1 pr-3 align-top">
<div>{entry?.raw ?? "—"}</div>
{/* Read-only classification annotation; edit it in Step 1. */}
{entry && (
<div className="mt-0.5 text-[11px] text-amber-600">
{classified || "not classified"}
</div>
)}
</td>
);
})}
<td className="py-1 pr-3">
<select
value={assignment[position]}
onChange={(e) => setSlot(position, Number(e.target.value))}
className="rounded border border-amber-300 bg-white px-2 py-1 text-xs text-amber-900"
>
{Array.from({ length: count }).map((_, slot) => (
<option key={slot} value={slot}>
{partLabel(slot)}
</option>
))}
</select>
</td>
</tr>
))}
</tbody>
</table>
</div>
{!valid && (
<p className="mt-2 text-xs text-red-600">
Each part (Main building, Extension 1, ) must be used exactly once.
</p>
)}
<div className="mt-3 flex items-center gap-3">
<button
onClick={onConfirm}
disabled={!valid || confirm.isPending}
className={`inline-flex items-center gap-1.5 rounded-lg px-4 py-1.5 text-xs font-bold text-white transition-opacity ${
!valid || confirm.isPending
? "bg-amber-300 cursor-not-allowed"
: "bg-amber-600 hover:opacity-90"
}`}
>
{confirm.isPending ? "Saving…" : confirmed ? "Update order" : "Confirm order"}
</button>
{confirmed && !confirm.isPending && (
<span className="inline-flex items-center gap-1 text-xs font-semibold text-green-600">
<CheckCircleIcon className="h-4 w-4" />
Order confirmed
</span>
)}
</div>
{confirm.error && (
<p className="mt-2 text-xs text-red-600">{confirm.error.message}</p>
)}
</div>
);
}
// Unresolved-classification gate (ADR-0006). Lists every description still
// classified `Unknown` portfolio-wide and lets the user map each to a real value
// via the same per-description override path as Step 1 (it applies portfolio-
// wide). Finalise stays blocked until this list is empty — `Unknown` is never a
// final value, and an unresolved one would fail the import loudly.
function UnresolvedClassificationsPanel({
unknown,
portfolioId,
uploadId,
}: {
unknown: Record<string, string[]>;
portfolioId: string;
uploadId: string;
}) {
const editClassification = useEditClassification(portfolioId, uploadId);
const total = Object.values(unknown).reduce((n, d) => n + d.length, 0);
return (
<div className="rounded-lg border border-red-200 bg-red-50 px-4 py-3">
<p className="text-sm font-semibold text-red-900">
Resolve unclassified descriptions ({total})
</p>
<p className="mt-0.5 text-xs text-red-800">
We couldn&apos;t classify these automatically. Map each to a category
before finalising an unresolved value would fail the import. Edits apply
to <span className="font-medium">every</span> row across the portfolio.
</p>
<div className="mt-3 space-y-3">
{Object.entries(unknown).map(([field, descriptions]) => {
const options = (CATEGORY_VALUES[field] ?? []).filter((o) => o !== "Unknown");
return (
<div key={field}>
<p className="text-[11px] font-semibold uppercase tracking-wide text-red-700">
{FIELD_LABEL[field] ?? field}
</p>
<div className="mt-1 space-y-1">
{descriptions.map((description) => (
<div
key={description}
className="flex items-center gap-2 text-xs text-red-900"
>
<span className="min-w-0 flex-1 truncate" title={description}>
{description}
</span>
<span className="text-red-400"></span>
<select
defaultValue=""
onChange={(e) =>
e.target.value &&
editClassification.mutate({
field,
description,
value: e.target.value,
})
}
disabled={editClassification.isPending}
className="max-w-[14rem] rounded border border-red-300 bg-white px-1.5 py-0.5 text-[11px] text-red-900"
>
<option value="" disabled>
Choose a category
</option>
{options.map((opt) => (
<option key={opt} value={opt}>
{opt}
</option>
))}
</select>
</div>
))}
</div>
</div>
);
})}
</div>
{editClassification.error && (
<p className="mt-1 text-xs text-red-600">{editClassification.error.message}</p>
)}
</div>
);
}
function StageButton({
label,
activeLabel,
isPending,
disabled = false,
disabledReason,
onClick,
}: {
label: string;
activeLabel: string;
isPending: boolean;
disabled?: boolean;
disabledReason?: string;
onClick: () => void;
}) {
const blocked = isPending || disabled;
return (
<button
onClick={onClick}
disabled={isPending}
disabled={blocked}
title={disabled && !isPending ? disabledReason : undefined}
className={`inline-flex items-center gap-2 self-start px-5 py-2 rounded-xl bg-gradient-to-br from-[#14163d] to-[#15173e] text-white text-sm font-bold transition-opacity ${
isPending ? "opacity-50 cursor-not-allowed" : "hover:opacity-90"
blocked ? "opacity-50 cursor-not-allowed" : "hover:opacity-90"
}`}
>
{isPending ? (

View file

@ -10,38 +10,14 @@ import {
ArrowsRightLeftIcon,
} from "@heroicons/react/24/outline";
import { useSetColumnMapping } from "@/lib/bulkUpload/client";
const INTERNAL_FIELDS = [
{ value: "address_1", label: "Address 1", required: true },
{ value: "address_2", label: "Address 2", required: false },
{ value: "address_3", label: "Address 3", required: false },
{ value: "postcode", label: "Postcode", required: true },
{ value: "internal_reference", label: "Internal Reference (Optional)", required: false },
{ value: "skip", label: "Skip this column", required: false },
];
const REQUIRED_VALUES = ["address_1", "postcode"];
function autoDetect(header: string): string {
const h = header.toLowerCase().replace(/[\s_\-]/g, "");
if (/^(address|addr)(line)?(1|one)?$/.test(h)) return "address_1";
if (/^(address|addr)(line)?(2|two)|^street$/.test(h)) return "address_2";
if (/^(address|addr)(line)?(3|three)|^locality$|^town$|^city$/.test(h)) return "address_3";
if (/^post(al)?code$|^postcode$|^pcode$/.test(h)) return "postcode";
if (/^(internal)?ref(erence)?$|^id$/.test(h)) return "internal_reference";
return "skip";
}
function buildInitialMapping(
headers: string[],
existing?: Record<string, string>
): Record<string, string> {
const mapping: Record<string, string> = {};
for (const h of headers) {
mapping[h] = existing?.[h] ?? autoDetect(h);
}
return mapping;
}
import {
ADDRESS_FIELDS,
CLASSIFIER_FIELDS,
NOT_PROVIDED,
buildInitialMapping,
validateColumnMapping,
type InternalField,
} from "@/lib/bulkUpload/columnFields";
interface Props {
portfolioId: string;
@ -59,19 +35,24 @@ export default function MapColumnsClient({
existingMapping,
}: Props) {
const router = useRouter();
// mapping: internal field → source CSV header. Unmapped fields are absent.
const [mapping, setMapping] = useState<Record<string, string>>(
buildInitialMapping(sourceHeaders, existingMapping)
);
const setMappingMutation = useSetColumnMapping(portfolioId, uploadId);
const mappedValues = Object.values(mapping).filter((v) => v !== "skip");
const missingRequired = REQUIRED_VALUES.filter((r) => !mappedValues.includes(r));
const validationError = validateColumnMapping(mapping);
const submitting = setMappingMutation.isPending;
const error = setMappingMutation.error?.message ?? null;
const canSubmit = missingRequired.length === 0 && !submitting;
const requestError = setMappingMutation.error?.message ?? null;
const canSubmit = validationError === null && !submitting;
function setField(header: string, value: string) {
setMapping((prev) => ({ ...prev, [header]: value }));
function setField(field: string, header: string) {
setMapping((prev) => {
const next = { ...prev };
if (header === NOT_PROVIDED) delete next[field];
else next[field] = header;
return next;
});
}
function handleSubmit() {
@ -86,6 +67,86 @@ export default function MapColumnsClient({
);
}
function renderRow(field: InternalField) {
const value = mapping[field.value] ?? NOT_PROVIDED;
const isMapped = value !== NOT_PROVIDED;
return (
<div
key={field.value}
className="grid grid-cols-12 items-center px-6 py-4 hover:bg-gray-50/50 transition-colors"
>
{/* Internal field */}
<div className="col-span-4 flex items-center gap-3">
<div className="w-8 h-8 rounded-lg bg-gray-100 flex items-center justify-center shrink-0">
<TableCellsIcon className="h-4 w-4 text-gray-400" />
</div>
<div>
<p className="text-sm font-semibold text-gray-900">
{field.label}
{field.required && <span className="text-amber-600"> *</span>}
</p>
<p className="text-xs text-gray-400">
{field.kind === "classifier" ? "Landlord description" : "Internal field"}
</p>
</div>
</div>
{/* Arrow */}
<div className="col-span-1 flex justify-center">
<ArrowsRightLeftIcon className="h-4 w-4 text-gray-300" />
</div>
{/* Header picker */}
<div className="col-span-5">
<select
value={value}
onChange={(e) => setField(field.value, e.target.value)}
className="w-full text-sm border border-gray-200 rounded-lg px-3 py-2 bg-white text-gray-800 focus:outline-none focus:ring-2 focus:ring-[#14163d]/20 focus:border-[#14163d]"
>
<option value={NOT_PROVIDED}>Not provided</option>
{sourceHeaders.map((header, index) => (
<option key={`${header}-${index}`} value={header}>
{header}
</option>
))}
</select>
</div>
{/* Status badge */}
<div className="col-span-2 flex justify-end">
<span
className={`inline-flex items-center gap-1 px-2.5 py-1 rounded-full text-xs font-semibold ${
isMapped ? "bg-amber-50 text-amber-700" : "bg-gray-100 text-gray-400"
}`}
>
<span className="w-1.5 h-1.5 rounded-full bg-current opacity-70" />
{isMapped ? "Mapped" : "Not provided"}
</span>
</div>
</div>
);
}
function renderSection(title: string, subtitle: string, fields: InternalField[]) {
return (
<div className="bg-white border border-gray-100 rounded-2xl overflow-hidden shadow-sm mb-6">
<div className="px-6 py-3 bg-gray-50 border-b border-gray-100">
<p className="text-xs font-semibold text-gray-500 uppercase tracking-wider">
{title}
</p>
<p className="text-xs text-gray-400 mt-0.5">{subtitle}</p>
</div>
{sourceHeaders.length === 0 ? (
<div className="px-6 py-12 text-center text-sm text-gray-400">
No headers found in this file.
</div>
) : (
<div className="divide-y divide-gray-50">{fields.map(renderRow)}</div>
)}
</div>
);
}
return (
<div className="max-w-4xl mx-auto px-6 py-10">
{/* Breadcrumb + step */}
@ -116,102 +177,27 @@ export default function MapColumnsClient({
Column Remapper
</h1>
<p className="text-sm text-gray-500 max-w-lg">
Align your spreadsheet headers with our internal property data structure to
ensure accurate address processing.
Tell us which spreadsheet column feeds each field. Address fields drive
matching; landlord-description fields are classified into property facts.
</p>
</div>
{/* Table */}
<div className="bg-white border border-gray-100 rounded-2xl overflow-hidden shadow-sm mb-6">
{/* Column headers */}
<div className="grid grid-cols-12 items-center px-6 py-3 bg-gray-50 border-b border-gray-100">
<span className="col-span-4 text-xs font-semibold text-gray-400 uppercase tracking-wider">
Spreadsheet Header
</span>
<span className="col-span-1" />
<span className="col-span-5 text-xs font-semibold text-gray-400 uppercase tracking-wider">
Internal Field Mapping
</span>
<span className="col-span-2 text-xs font-semibold text-gray-400 uppercase tracking-wider text-right">
Status
</span>
</div>
{sourceHeaders.length === 0 ? (
<div className="px-6 py-12 text-center text-sm text-gray-400">
No headers found in this file.
</div>
) : (
<div className="divide-y divide-gray-50">
{sourceHeaders.map((header) => {
const value = mapping[header] ?? "skip";
const isMapped = value !== "skip";
return (
<div
key={header}
className="grid grid-cols-12 items-center px-6 py-4 hover:bg-gray-50/50 transition-colors"
>
{/* Source header */}
<div className="col-span-4 flex items-center gap-3">
<div className="w-8 h-8 rounded-lg bg-gray-100 flex items-center justify-center shrink-0">
<TableCellsIcon className="h-4 w-4 text-gray-400" />
</div>
<div>
<p className="text-sm font-semibold text-gray-900">{header}</p>
<p className="text-xs text-gray-400">Source column</p>
</div>
</div>
{/* Arrow */}
<div className="col-span-1 flex justify-center">
<ArrowsRightLeftIcon className="h-4 w-4 text-gray-300" />
</div>
{/* Dropdown */}
<div className="col-span-5">
<select
value={value}
onChange={(e) => setField(header, e.target.value)}
className="w-full text-sm border border-gray-200 rounded-lg px-3 py-2 bg-white text-gray-800 focus:outline-none focus:ring-2 focus:ring-[#14163d]/20 focus:border-[#14163d]"
>
{INTERNAL_FIELDS.map((f) => (
<option key={f.value} value={f.value}>
{f.label}
</option>
))}
</select>
</div>
{/* Status badge */}
<div className="col-span-2 flex justify-end">
<span
className={`inline-flex items-center gap-1 px-2.5 py-1 rounded-full text-xs font-semibold ${
isMapped
? "bg-amber-50 text-amber-700"
: "bg-gray-100 text-gray-400"
}`}
>
<span className="w-1.5 h-1.5 rounded-full bg-current opacity-70" />
{isMapped ? "Mapped" : "Skipped"}
</span>
</div>
</div>
);
})}
</div>
)}
</div>
{/* Validation error */}
{missingRequired.length > 0 && (
<p className="text-xs text-amber-600 mb-4">
Required fields not yet mapped:{" "}
{missingRequired
.map((r) => INTERNAL_FIELDS.find((f) => f.value === r)?.label)
.join(", ")}
</p>
{renderSection(
"Address fields",
"Used for address matching. A column can feed only one address field.",
ADDRESS_FIELDS
)}
{error && <p className="text-xs text-red-500 mb-4">{error}</p>}
{renderSection(
"Landlord description fields (optional)",
"Classified into property facts. Several fields may share one column.",
CLASSIFIER_FIELDS
)}
{/* Validation / request error */}
{validationError && (
<p className="text-xs text-amber-600 mb-4">{validationError}</p>
)}
{requestError && <p className="text-xs text-red-500 mb-4">{requestError}</p>}
{/* Footer */}
<div className="flex items-center justify-between">
@ -249,8 +235,10 @@ export default function MapColumnsClient({
Pro Tip
</p>
<p className="text-sm text-gray-500 italic">
&ldquo;Ensure your source file doesn&apos;t have blank headers. Any column mapped to
&ldquo;Skip&rdquo; will be ignored during import.&rdquo;
&ldquo;Fields left as &ldquo;Not provided&rdquo; are ignored. The same
column can feed several landlord-description fields e.g. one
&ldquo;Property Type&rdquo; column can drive both Property Type and Built
Form.&rdquo;
</p>
</div>
</div>

View file

@ -67,6 +67,14 @@ const STATUS_CONFIG = {
body: "Matches ready, writing into your portfolio.",
cta: false,
},
finalising: {
icon: ArrowPathIcon,
iconBg: "bg-blue-50",
iconColor: "text-blue-500",
title: "Uploading to ARA",
body: "Creating your properties from the matched addresses. This can take a little while for large files.",
cta: false,
},
complete: {
icon: CheckCircleIcon,
iconBg: "bg-green-50",
@ -167,6 +175,7 @@ export default async function BulkUploadDetailPage(props: {
{(statusKey === "processing" ||
statusKey === "combining" ||
statusKey === "awaiting_review" ||
statusKey === "finalising" ||
statusKey === "complete" ||
statusKey === "failed") &&
upload.taskId && (
@ -174,6 +183,7 @@ export default async function BulkUploadDetailPage(props: {
portfolioSlug={slug}
portfolioId={upload.portfolioId}
uploadId={uploadId}
serverStatus={upload.status}
isDomnaUser={isDomnaUser}
/>
)}

View file

@ -14,6 +14,7 @@ import {
const STATUS_LABELS: Record<string, { label: string; classes: string }> = {
ready_for_processing: { label: "Ready", classes: "bg-amber-100 text-amber-700" },
processing: { label: "Processing", classes: "bg-blue-100 text-blue-700" },
finalising: { label: "Uploading to ARA", classes: "bg-blue-100 text-blue-700" },
complete: { label: "Complete", classes: "bg-green-100 text-green-700" },
failed: { label: "Failed", classes: "bg-red-100 text-red-700" },
};

View file

@ -0,0 +1,101 @@
import { getServerSession } from "next-auth";
import { redirect } from "next/navigation";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
import {
getLandlordOverrides,
type OverrideRow,
type LandlordOverrideCategory,
} from "@/lib/landlordOverrides/server";
import { CLASSIFIER_FIELDS } from "@/lib/bulkUpload/columnFields";
export default async function LandlordOverridesPage(props: {
params: Promise<{ slug: string }>;
}) {
const { slug } = await props.params;
const session = await getServerSession(AuthOptions);
if (!session) redirect("/login");
const results = await getLandlordOverrides(slug);
const total = Object.values(results).reduce((n, rows) => n + rows.length, 0);
return (
<div className="max-w-4xl mx-auto px-6 py-10">
<div className="mb-8">
<h1 className="text-3xl font-extrabold text-gray-900 tracking-tight mb-1">
Landlord overrides
</h1>
<p className="text-sm text-gray-500 max-w-lg">
Property facts classified from your bulk-upload descriptions. Read-only
editing comes later.
</p>
</div>
{total === 0 ? (
<div className="bg-white border border-gray-100 rounded-2xl px-6 py-12 text-center text-sm text-gray-400 shadow-sm">
No classified values yet. They appear here once a bulk upload with
landlord-description columns has been processed.
</div>
) : (
CLASSIFIER_FIELDS.map((field) => (
<OverrideSection
key={field.value}
title={field.label}
rows={results[field.value as LandlordOverrideCategory]}
/>
))
)}
</div>
);
}
function OverrideSection({ title, rows }: { title: string; rows: OverrideRow[] }) {
return (
<div className="bg-white border border-gray-100 rounded-2xl overflow-hidden shadow-sm mb-6">
<div className="flex items-center justify-between px-6 py-3 bg-gray-50 border-b border-gray-100">
<p className="text-xs font-semibold text-gray-500 uppercase tracking-wider">
{title}
</p>
<span className="text-xs text-gray-400">
{rows.length} {rows.length === 1 ? "value" : "values"}
</span>
</div>
{rows.length === 0 ? (
<div className="px-6 py-8 text-center text-sm text-gray-400">
No values for this category.
</div>
) : (
<div className="divide-y divide-gray-50">
{rows.map((row, i) => (
<div key={i} className="grid grid-cols-12 items-center px-6 py-3 gap-2">
<p
className="col-span-6 text-sm text-gray-700 truncate"
title={row.description}
>
{row.description}
</p>
<p className="col-span-4 text-sm font-semibold text-gray-900">
{row.value}
</p>
<div className="col-span-2 flex justify-end">
<SourceBadge source={row.source} />
</div>
</div>
))}
</div>
)}
</div>
);
}
function SourceBadge({ source }: { source: string }) {
const isUser = source === "user";
return (
<span
className={`inline-flex items-center px-2.5 py-1 rounded-full text-xs font-semibold ${
isUser ? "bg-emerald-50 text-emerald-700" : "bg-indigo-50 text-indigo-700"
}`}
>
{isUser ? "user" : "classifier"}
</span>
);
}

View file

@ -64,6 +64,10 @@ const COLUMN_LABELS: Record<string, string> = {
epcPrn: "EPC Certificate Number",
batch: "Group",
batchDescription: "Group Description",
coordinationComments: "Coordination Comments",
dampAndMouldGrowth: "Damp and Mould Growth",
dampMouldAndRepairComments: "Damp, Mould and Repair Comments",
domnaSurveyRequested: "Domna Survey Requested"
};
type DocFilter = "all" | "has_docs" | "incomplete" | "none";
@ -106,6 +110,10 @@ export default function PropertyTable({ data, onOpenDrawer, portfolioId = "", sh
epcPrn: false,
batch: false,
batchDescription: false,
coordinationComments: false,
dampAndMouldGrowth: false,
dampMouldAndRepairComments: false,
domnaSurveyRequested: false,
});
// Pre-filter by stage, doc status, and removal status before TanStack gets it

View file

@ -268,12 +268,12 @@ export function createPropertyTableColumns(
// ── EPC SAP score (potential) ─────────────────────────────────────────
{
accessorKey: "epcSapScorePotential",
accessorKey: "potentialPostSapScoreDropdown",
id: "epcSapScorePotential",
header: ({ column }) => <SortableHeader label="EPC SAP (Potential)" column={column as any} />,
cell: ({ row }) => (
<span className="text-xs font-mono text-gray-600">
{row.original.epcSapScorePotential ?? <span className="text-gray-300"></span>}
{row.original.potentialPostSapScoreDropdown ?? <span className="text-gray-300"></span>}
</span>
),
},
@ -371,6 +371,74 @@ export function createPropertyTableColumns(
),
},
// ── Coordination comments ────────────────────────────────────────────
{
accessorKey: "coordinationComments",
id: "coordinationComments",
header: () => (
<span className="text-xs font-semibold uppercase tracking-wide text-gray-500">
Coordination Comments
</span>
),
cell: ({ row }) => (
<span className="text-xs text-gray-600 max-w-[220px] line-clamp-2 leading-snug">
{row.original.coordinationComments ?? <span className="text-gray-300"></span>}
</span>
),
},
// ── Damp and mould growth ────────────────────────────────────────────
{
id: "dampAndMouldGrowth",
accessorFn: (row) => row.dampMouldFlag,
header: () => (
<span className="text-xs font-semibold uppercase tracking-wide text-gray-500">
Damp &amp; Mould Growth
</span>
),
cell: ({ row }) => (
<span className="text-xs text-gray-600">
{row.original.dampMouldFlag ?? <span className="text-gray-300"></span>}
</span>
),
},
// ── Damp, mould and repair comments ─────────────────────────────────
{
accessorKey: "dampMouldAndRepairComments",
id: "dampMouldAndRepairComments",
header: () => (
<span className="text-xs font-semibold uppercase tracking-wide text-gray-500">
Damp, Mould &amp; Repair Comments
</span>
),
cell: ({ row }) => (
<span className="text-xs text-gray-600 max-w-[220px] line-clamp-2 leading-snug">
{row.original.dampMouldAndRepairComments ?? <span className="text-gray-300"></span>}
</span>
),
},
// ── Domna survey required ────────────────────────────────────────────
{
id: "domnaSurveyRequested",
accessorFn: (row) => row.domnasurveyRequired,
header: () => (
<span className="text-xs font-semibold uppercase tracking-wide text-gray-500">
Domna Survey Required
</span>
),
cell: ({ row }) => {
const val = row.original.domnasurveyRequired;
if (val === null || val === undefined) return <span className="text-gray-300"></span>;
return (
<span className={`text-xs font-medium ${val ? "text-amber-700" : "text-gray-500"}`}>
{val ? "Yes" : "No"}
</span>
);
},
},
];
if (showDocuments) {

View file

@ -93,7 +93,7 @@ export default function DealPage({
});
const parsedPreSap = parsePreSap(deal.preSapScore);
const epcPotential = sapToEpc(deal.epcSapScorePotential != null ? Number(deal.epcSapScorePotential) : null);
const epcPotential = sapToEpc(deal.potentialPostSapScoreDropdown != null ? Number(deal.potentialPostSapScoreDropdown) : null);
const technicalApprovedMeasures = parseMeasures(
deal.technicalApprovedMeasuresForInstall ?? null,
);

View file

@ -187,7 +187,7 @@ const mockDealRow = {
eiScore: null,
eiScorePotential: null,
epcSapScore: null,
epcSapScorePotential: null,
potentialPostSapScoreDropdown: null,
epcPrn: null,
surveyType: null,
measuresForPibiOrdered: null,

View file

@ -36,6 +36,8 @@ export function mapDbRowToHubspotDeal(row: DealRow): HubspotDeal {
sharepointLink: d.sharepointLink,
dampMouldFlag: d.dampmouldGrowth,
dampMouldAndRepairComments: d.dampMouldAndRepairComments,
coordinationComments: d.coordination_comments,
domnasurveyRequired: d.domnasurveyRequired,
preSapScore: d.preSap,
coordinator: row.coordinator,
ioeV1Date: d.mtpCompletionDate,
@ -58,7 +60,7 @@ export function mapDbRowToHubspotDeal(row: DealRow): HubspotDeal {
eiScore: d.eiScore,
eiScorePotential: d.eiScorePotential,
epcSapScore: d.epcSapScore,
epcSapScorePotential: d.epcSapScorePotential,
potentialPostSapScoreDropdown: d.potentialPostSapScoreDropdown,
epcPrn: d.epcPrn,
surveyType: d.surveyType,
measuresForPibiOrdered: d.measuresForPibiOrdered,

View file

@ -47,7 +47,7 @@ function makeDeal(overrides: Partial<HubspotDeal> = {}): HubspotDeal {
eiScore: null,
eiScorePotential: null,
epcSapScore: null,
epcSapScorePotential: null,
potentialPostSapScoreDropdown: null,
epcPrn: null,
surveyType: null,
measuresForPibiOrdered: null,
@ -60,6 +60,8 @@ function makeDeal(overrides: Partial<HubspotDeal> = {}): HubspotDeal {
domnaSurveyDate: null,
batch: null,
batchDescription: null,
coordinationComments: null,
domnasurveyRequired: null,
createdAt: new Date(),
updatedAt: new Date(),
...overrides,

View file

@ -46,7 +46,7 @@ function makeDeal(overrides: Partial<ClassifiedDeal> = {}): ClassifiedDeal {
eiScore: null,
eiScorePotential: null,
epcSapScore: null,
epcSapScorePotential: null,
potentialPostSapScoreDropdown: null,
epcPrn: null,
surveyType: null,
measuresForPibiOrdered: null,
@ -59,6 +59,8 @@ function makeDeal(overrides: Partial<ClassifiedDeal> = {}): ClassifiedDeal {
domnaSurveyDate: null,
batch: null,
batchDescription: null,
coordinationComments: null,
domnasurveyRequired: null,
createdAt: new Date(),
updatedAt: new Date(),
displayStage: "Coordination in Progress",

View file

@ -46,7 +46,7 @@ function makeDeal(overrides: Partial<HubspotDeal> = {}): HubspotDeal {
eiScore: null,
eiScorePotential: null,
epcSapScore: null,
epcSapScorePotential: null,
potentialPostSapScoreDropdown: null,
epcPrn: null,
surveyType: null,
measuresForPibiOrdered: null,
@ -59,6 +59,8 @@ function makeDeal(overrides: Partial<HubspotDeal> = {}): HubspotDeal {
domnaSurveyDate: null,
batch: null,
batchDescription: null,
coordinationComments: null,
domnasurveyRequired: null,
createdAt: new Date(),
updatedAt: new Date(),
...overrides,

View file

@ -18,7 +18,7 @@ export const PROPERTY_CSV_FIELDS: PropertyCsvField[] = [
{ key: "eiScore", label: "EI Score" },
{ key: "eiScorePotential", label: "EI Score (Potential)" },
{ key: "epcSapScore", label: "EPC SAP Score" },
{ key: "epcSapScorePotential", label: "EPC SAP (Potential)" },
{ key: "potentialPostSapScoreDropdown", label: "EPC SAP (Potential)" },
{ key: "lodgementStatus", label: "Lodgement Status" },
{ key: "surveyedDate", label: "Surveyed Date" },
{ key: "designDate", label: "Design Date" },
@ -26,6 +26,10 @@ export const PROPERTY_CSV_FIELDS: PropertyCsvField[] = [
{ key: "epcPrn", label: "EPC Certificate Number" },
{ key: "batch", label: "Group" },
{ key: "batchDescription", label: "Group Description" },
{ key: "coordinationComments", label: "Coordination Comments" },
{ key: "dampMouldFlag", label: "Damp and Mould Growth" },
{ key: "dampMouldAndRepairComments", label: "Damp Mould and Repair Comments" },
{ key: "domnasurveyRequired", label: "Domna Survey Required" },
];
export function escapeCsvCell(value: unknown): string {

View file

@ -53,7 +53,7 @@ function makeDeal(overrides: Partial<HubspotDeal> = {}): HubspotDeal {
eiScore: null,
eiScorePotential: null,
epcSapScore: null,
epcSapScorePotential: null,
potentialPostSapScoreDropdown: null,
epcPrn: null,
surveyType: null,
measuresForPibiOrdered: null,
@ -66,6 +66,8 @@ function makeDeal(overrides: Partial<HubspotDeal> = {}): HubspotDeal {
domnaSurveyDate: null,
batch: null,
batchDescription: null,
coordinationComments: null,
domnasurveyRequired: null,
createdAt: new Date(),
updatedAt: new Date(),
...overrides,

View file

@ -30,6 +30,8 @@ export type HubspotDeal = {
sharepointLink: string | null;
dampMouldFlag: string | null; // coordinator-stage damp/mould flag
dampMouldAndRepairComments: string | null; // coordinator damp/mould comments
coordinationComments: string | null;
domnasurveyRequired: boolean | null;
preSapScore: string | null; // kept as text (HubSpot returns strings)
coordinator: string | null;
ioeV1Date: Date | null;
@ -52,7 +54,7 @@ export type HubspotDeal = {
eiScore: string | null;
eiScorePotential: string | null;
epcSapScore: string | null;
epcSapScorePotential: string | null;
potentialPostSapScoreDropdown: string | null;
epcPrn: string | null;
// ── New per-deal workflow fields (issue #249 slice) ────────────────────

View file

@ -96,6 +96,100 @@ export function useSetColumnMapping(portfolioId: string, uploadId: string) {
});
}
// field -> description -> resolved enum, for the multi-entry sample (issue #298).
export type SampleClassifications = Record<string, Record<string, string>>;
export function useEditClassification(portfolioId: string, uploadId: string) {
const queryClient = useQueryClient();
return useMutation<void, Error, { field: string; description: string; value: string }>({
mutationFn: async (input) => {
const res = await fetch(
`/api/portfolio/${portfolioId}/bulk-uploads/${uploadId}/classifications`,
{
method: "PATCH",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(input),
},
);
if (!res.ok) throw await parseError(res, "Failed to save classification.");
},
onSuccess: () => {
queryClient.invalidateQueries({
queryKey: [...bulkUploadKeys.progress(uploadId), "classifications"],
});
},
});
}
// Sample classifications for the review panels PLUS the still-`Unknown`
// descriptions that gate Finalise (ADR-0006).
export interface ClassificationsView {
classifications: SampleClassifications;
unknown: Record<string, string[]>;
}
export function useSampleClassifications(
portfolioId: string,
uploadId: string,
enabled: boolean,
) {
return useQuery<ClassificationsView, Error>({
queryKey: [...bulkUploadKeys.progress(uploadId), "classifications"],
enabled,
queryFn: async () => {
const res = await fetch(
`/api/portfolio/${portfolioId}/bulk-uploads/${uploadId}/classifications`,
);
if (!res.ok) throw await parseError(res, "Failed to load classifications.");
const body = await res.json();
return {
classifications: (body.classifications ?? {}) as SampleClassifications,
unknown: (body.unknown ?? {}) as Record<string, string[]>,
};
},
});
}
export function useConfirmMultiEntryOrdering(portfolioId: string, uploadId: string) {
const queryClient = useQueryClient();
return useMutation<BulkUpload, Error, { permutations: Record<string, number[]> }>({
mutationFn: async (input) => {
const res = await fetch(
`/api/portfolio/${portfolioId}/bulk-uploads/${uploadId}/multi-entry-ordering`,
{
method: "PATCH",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(input),
},
);
if (!res.ok) throw await parseError(res, "Failed to save ordering.");
return res.json();
},
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: bulkUploadKeys.progress(uploadId) });
},
});
}
// Records the "Verify classification" acknowledgement (ADR-0004 Step 1),
// unlocking Finalise. Per-row corrections go through useEditClassification.
export function useConfirmVerification(portfolioId: string, uploadId: string) {
const queryClient = useQueryClient();
return useMutation<BulkUpload, Error, void>({
mutationFn: async () => {
const res = await fetch(
`/api/portfolio/${portfolioId}/bulk-uploads/${uploadId}/verify-classification`,
{ method: "PATCH" },
);
if (!res.ok) throw await parseError(res, "Failed to confirm classification.");
return res.json();
},
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: bulkUploadKeys.progress(uploadId) });
},
});
}
export function useStartAddressMatching(portfolioId: string, uploadId: string) {
const queryClient = useQueryClient();
return useMutation<{ taskId: string }, Error, void>({
@ -113,7 +207,11 @@ export function useStartAddressMatching(portfolioId: string, uploadId: string) {
});
}
export function useBulkUploadProgress(portfolioId: string, uploadId: string) {
export function useBulkUploadProgress(
portfolioId: string,
uploadId: string,
options?: { onSuccess?: (data: ProgressView) => void },
) {
return useQuery<ProgressView, Error>({
queryKey: bulkUploadKeys.progress(uploadId),
queryFn: async () => {
@ -127,6 +225,9 @@ export function useBulkUploadProgress(portfolioId: string, uploadId: string) {
const status = data?.upload.status;
return status && isTerminalStatus(status) ? false : 3000;
},
// v4 onSuccess fires after each successful poll; callers use it to react to a
// status transition (e.g. refresh the server page once it goes terminal).
onSuccess: options?.onSuccess,
});
}

View file

@ -0,0 +1,100 @@
// Single source of truth for BulkUpload column mapping.
//
// The mapping is stored as `field → source CSV header` (one entry per mapped
// internal field). One source header may feed several CLASSIFIER fields (e.g.
// "Property Type" → both property_type and built_form_type) but at most one
// ADDRESS field — see docs/adr/0003-classifier-triggers-as-address-subtask.md
// and docs/wip/landlord-override-frontend-plan.md (Q2.2, Q3).
//
// Classifier field `value`s mirror the Model service's ClassifiableColumn names
// (property_type / built_form_type / wall_type / roof_type) so the mapping can
// be forwarded to the lambda trigger verbatim.
export type InternalFieldKind = "address" | "classifier";
export interface InternalField {
value: string;
label: string;
kind: InternalFieldKind;
required: boolean;
// Canonical header written into the address-matching CSV (address fields only).
outputHeader?: string;
}
export const INTERNAL_FIELDS: InternalField[] = [
{ value: "address_1", label: "Address 1", kind: "address", required: true, outputHeader: "Address 1" },
{ value: "address_2", label: "Address 2", kind: "address", required: false, outputHeader: "Address 2" },
{ value: "address_3", label: "Address 3", kind: "address", required: false, outputHeader: "Address 3" },
{ value: "postcode", label: "Postcode", kind: "address", required: true, outputHeader: "postcode" },
{ value: "internal_reference", label: "Internal Reference", kind: "address", required: false, outputHeader: "Internal Reference" },
{ value: "property_type", label: "Property Type", kind: "classifier", required: false },
{ value: "built_form_type", label: "Built Form", kind: "classifier", required: false },
{ value: "wall_type", label: "Wall Type", kind: "classifier", required: false },
{ value: "roof_type", label: "Roof Type", kind: "classifier", required: false },
];
export const ADDRESS_FIELDS = INTERNAL_FIELDS.filter((f) => f.kind === "address");
export const CLASSIFIER_FIELDS = INTERNAL_FIELDS.filter((f) => f.kind === "classifier");
export const CLASSIFIER_FIELD_VALUES = CLASSIFIER_FIELDS.map((f) => f.value);
export const REQUIRED_FIELD_VALUES = INTERNAL_FIELDS.filter((f) => f.required).map((f) => f.value);
// Sentinel for an unmapped field in the UI dropdown ("Not provided").
export const NOT_PROVIDED = "";
// header → address field detection. Classifier fields are never auto-detected
// (Q2.1): mapping them is always an explicit user choice.
export function autoDetectField(header: string): string | null {
const h = header.toLowerCase().replace(/[\s_\-]/g, "");
if (/^(address|addr)(line)?(1|one)?$/.test(h)) return "address_1";
if (/^(address|addr)(line)?(2|two)|^street$/.test(h)) return "address_2";
if (/^(address|addr)(line)?(3|three)|^locality$|^town$|^city$/.test(h)) return "address_3";
if (/^post(al)?code$|^postcode$|^pcode$/.test(h)) return "postcode";
if (/^(internal)?ref(erence)?$|^id$/.test(h)) return "internal_reference";
return null;
}
// Build the initial field→header mapping: keep any existing choices, then
// auto-fill address fields from the headers (first matching header wins).
export function buildInitialMapping(
sourceHeaders: string[],
existing?: Record<string, string>,
): Record<string, string> {
const mapping: Record<string, string> = { ...(existing ?? {}) };
for (const header of sourceHeaders) {
const field = autoDetectField(header);
if (!field) continue;
if (mapping[field] === undefined) mapping[field] = header;
}
return mapping;
}
// Validation shared by the client (live) and the server (authoritative).
// Returns the first problem as a message, or null when the mapping is valid.
export function validateColumnMapping(mapping: Record<string, string>): string | null {
for (const field of REQUIRED_FIELD_VALUES) {
if (!mapping[field]) {
const label = INTERNAL_FIELDS.find((f) => f.value === field)?.label ?? field;
return `${label} must be mapped to a column.`;
}
}
const usedAddressHeaders = new Set<string>();
for (const field of ADDRESS_FIELDS) {
const header = mapping[field.value];
if (!header) continue;
if (usedAddressHeaders.has(header)) {
return `Column "${header}" is mapped to more than one address field.`;
}
usedAddressHeaders.add(header);
}
return null;
}
// The classifier subset of the mapping (category → source header) that gets
// forwarded to the lambda trigger. Address fields are intentionally excluded.
export function classifierMapping(mapping: Record<string, string>): Record<string, string> {
const out: Record<string, string> = {};
for (const field of CLASSIFIER_FIELD_VALUES) {
if (mapping[field]) out[field] = mapping[field];
}
return out;
}

View file

@ -0,0 +1,110 @@
import { describe, expect, it } from "vitest";
import { detectMultiEntry, assignmentToPermutation, isPermutation } from "./multiEntry";
// field -> source header, the shape stored on the upload. property_type and
// built_form_type intentionally share a header (the classifier allows it).
const MAPPING = {
address_1: "Addr",
postcode: "PC",
property_type: "Property Type",
wall_type: "Walls",
roof_type: "Roofs",
};
describe("detectMultiEntry", () => {
it("returns an empty summary when no classifier columns are mapped", () => {
const rows = [{ Addr: "1 High St", PC: "AB1 2CD" }];
const summary = detectMultiEntry(rows, { address_1: "Addr", postcode: "PC" });
expect(summary.sample).toBeNull();
expect(summary.largestCount).toBe(0);
});
it("captures a single-part verify sample when classifier columns exist but no row is multi-entry", () => {
const rows = [
{ Addr: "1 High St", PC: "AB1 2CD", "Property Type": "House: EndTerrace", Walls: "Cavity: AsBuilt", Roofs: "Pitched: 200mm" },
];
const summary = detectMultiEntry(rows, MAPPING);
// Not multi-entry, but there IS a sample to verify (ADR-0004 Step 1).
expect(summary.largestCount).toBe(0);
expect(summary.sample).not.toBeNull();
expect(summary.sample!.count).toBe(1);
expect(summary.sample!.address).toBe("1 High St, AB1 2CD");
// All three mapped classifier columns are present, one entry each.
expect(summary.sample!.columns.map((c) => c.field).sort()).toEqual([
"property_type",
"roof_type",
"wall_type",
]);
expect(summary.sample!.columns.every((c) => c.entries.length === 1)).toBe(true);
});
it("picks the largest-count row as the sample and reports it as multi-entry", () => {
const rows = [
{ Addr: "1 High St", PC: "AB1 2CD", "Property Type": "House: EndTerrace", Walls: "Cavity: AsBuilt", Roofs: "Pitched: 200mm" },
{ Addr: "2 Low St", PC: "AB3 4EF", "Property Type": "House: Detached", Walls: "Cavity: AsBuilt, Cavity: Filled", Roofs: "Flat: AsBuilt, Pitched: 200mm" },
];
const summary = detectMultiEntry(rows, MAPPING);
expect(summary.largestCount).toBe(2);
expect(summary.countDistribution).toEqual({ "2": 1 });
expect(summary.sample!.address).toBe("2 Low St, AB3 4EF");
expect(summary.sample!.count).toBe(2);
// multiValuedFields are the ones that actually split.
expect([...summary.multiValuedFields].sort()).toEqual(["roof_type", "wall_type"]);
// The whole-dwelling Property Type column is still carried (for Step 1),
// with a single entry — Step 2 filters it out by entries.length.
const propertyCol = summary.sample!.columns.find((c) => c.field === "property_type");
expect(propertyCol?.entries).toHaveLength(1);
const wallCol = summary.sample!.columns.find((c) => c.field === "wall_type");
expect(wallCol?.entries.map((e) => e.raw)).toEqual(["Cavity: AsBuilt", "Cavity: Filled"]);
});
it("captures one ordering sample per distinct count (ADR-0004 amendment)", () => {
const rows = [
{ Addr: "1 High St", PC: "AB1 2CD", "Property Type": "House: Detached", Walls: "Cavity: AsBuilt", Roofs: "Pitched: 200mm" }, // count 1
{ Addr: "2 Low St", PC: "AB3 4EF", "Property Type": "House: Semi", Walls: "Cavity, Solid", Roofs: "Flat, Pitched" }, // count 2
{ Addr: "3 Mid Rd", PC: "AB5 6GH", "Property Type": "House: Mid", Walls: "Cavity, Solid, Render", Roofs: "Flat, Pitched, Slate" }, // count 3
{ Addr: "4 Side Ln", PC: "AB7 8IJ", "Property Type": "House: Other", Walls: "Brick, Stone", Roofs: "Tile, Slate" }, // count 2 again
];
const summary = detectMultiEntry(rows, MAPPING);
expect(summary.largestCount).toBe(3);
expect(summary.countDistribution).toEqual({ "2": 2, "3": 1 });
// A sample for every count >= 2 — and only those.
expect(Object.keys(summary.samplesByCount ?? {}).sort()).toEqual(["2", "3"]);
expect(summary.samplesByCount!["2"].count).toBe(2);
expect(summary.samplesByCount!["3"].count).toBe(3);
// The count-2 sample is the FIRST count-2 row, not the count-3 one.
expect(summary.samplesByCount!["2"].address).toBe("2 Low St, AB3 4EF");
const wall2 = summary.samplesByCount!["2"].columns.find((c) => c.field === "wall_type");
expect(wall2?.entries.map((e) => e.raw)).toEqual(["Cavity", "Solid"]);
const wall3 = summary.samplesByCount!["3"].columns.find((c) => c.field === "wall_type");
expect(wall3?.entries.map((e) => e.raw)).toEqual(["Cavity", "Solid", "Render"]);
});
it("normalizes descriptions to lower-case (matching the classifier's key)", () => {
const rows = [{ Addr: "1 High St", PC: "AB1 2CD", "Property Type": "House: EndTerrace", Walls: "", Roofs: "" }];
const summary = detectMultiEntry(rows, MAPPING);
const entry = summary.sample!.columns.find((c) => c.field === "property_type")!.entries[0];
expect(entry.raw).toBe("House: EndTerrace");
expect(entry.description).toBe("house: endterrace");
});
});
describe("ordering helpers", () => {
it("isPermutation accepts a bijection and rejects duplicates/out-of-range", () => {
expect(isPermutation([0, 1])).toBe(true);
expect(isPermutation([1, 0, 2])).toBe(true);
expect(isPermutation([0, 0])).toBe(false);
expect(isPermutation([0, 2])).toBe(false);
});
it("assignmentToPermutation inverts assignment[pos]=slot to permutation[slot]=pos", () => {
// file position 1 holds the main building (slot 0), position 0 is extension 1.
expect(assignmentToPermutation([1, 0])).toEqual([1, 0]);
expect(assignmentToPermutation([0, 1, 2])).toEqual([0, 1, 2]);
expect(assignmentToPermutation([2, 0, 1])).toEqual([1, 2, 0]);
});
});

View file

@ -0,0 +1,192 @@
// Multi-entry building-part detection (ADR-0004).
//
// A BulkUpload row can carry several comma-separated entries in a physical-
// element column (e.g. Walls = "Cavity: AsBuilt (1976-1982), Cavity:
// FilledCavity"). Each entry is a Building part (Main building + Extensions).
// This module finds that pattern and captures one sample — the row with the
// MOST building parts — so the user can confirm the ordering downstream.
//
// Pure + I/O-free so it's unit-testable; the start-address-matching route runs
// it over the already-parsed upload rows and persists the result on the upload.
import { ADDRESS_FIELDS, classifierMapping } from "./columnFields";
import type {
MultiEntryEntry,
MultiEntryColumn,
MultiEntrySample,
MultiEntrySummary,
} from "@/app/db/schema/bulk_address_uploads";
// The jsonb shape lives with the column (schema/bulk_address_uploads.ts) so the
// migration is self-contained; re-export here for callers of this module.
export type {
MultiEntryEntry,
MultiEntryColumn,
MultiEntrySample,
MultiEntrySummary,
MultiEntryOrdering,
} from "@/app/db/schema/bulk_address_uploads";
// --- Building-part ordering (ADR-0004) ---
// Label for building-part slot k: slot 0 is the Main building, the rest are
// numbered extensions.
export function partLabel(slot: number): string {
return slot === 0 ? "Main building" : `Extension ${slot}`;
}
// True when `arr` is a permutation of [0, n-1] (each slot used exactly once).
export function isPermutation(arr: number[]): boolean {
const seen = new Set(arr);
return (
seen.size === arr.length &&
arr.every((n) => Number.isInteger(n) && n >= 0 && n < arr.length)
);
}
// The UI collects, per file position, which building-part slot it holds
// (`assignment[pos] = slot`). Storage is keyed the other way —
// `permutation[slot] = pos` — so a consumer can ask "which file position holds
// the main building?". Both are permutations of [0, n-1]; this inverts one to
// the other.
export function assignmentToPermutation(assignment: number[]): number[] {
const permutation = new Array<number>(assignment.length);
assignment.forEach((slot, position) => {
permutation[slot] = position;
});
return permutation;
}
export const EMPTY_MULTI_ENTRY_SUMMARY: MultiEntrySummary = {
multiValuedFields: [],
countDistribution: {},
largestCount: 0,
sample: null,
samplesByCount: {},
};
// Split a cell into building-part entries. Mirrors the classifier's
// split(",") → trim → lower, dropping empty fragments so positions align
// across raw and normalized forms.
export function splitEntries(value: unknown): MultiEntryEntry[] {
return String(value ?? "")
.split(",")
.map((s) => s.trim())
.filter((s) => s.length > 0)
.map((raw) => ({ raw, description: raw.toLowerCase() }));
}
// Compose a display address from the mapped address fields (reference excluded).
function buildAddress(
row: Record<string, unknown>,
columnMapping: Record<string, string>,
): string {
const parts: string[] = [];
for (const field of ADDRESS_FIELDS) {
if (field.value === "internal_reference") continue;
const header = columnMapping[field.value];
if (!header) continue;
const value = String(row[header] ?? "").trim();
if (value) parts.push(value);
}
return parts.join(", ");
}
// Scan the mapped classifier columns and capture one sample address. Only
// classifier columns are considered — they're the physical-element descriptions
// we slice into building parts; address columns are single-valued by nature.
//
// The sample serves both review steps (ADR-0004): the largest-count multi-entry
// row when one exists (Step 2 — Confirm order), otherwise the first row carrying
// any classifier value so Step 1 — Verify classification still has something to
// show. `largestCount >= 2` is the multi-entry signal; `sample != null` means
// "there is something to verify". The sample carries every mapped classifier
// column with a value — Step 1 lists them all; Step 2 renders only the
// multi-valued ones.
export function detectMultiEntry(
rows: Array<Record<string, unknown>>,
columnMapping: Record<string, string>,
): MultiEntrySummary {
const classifierCols = Object.entries(classifierMapping(columnMapping));
if (classifierCols.length === 0) return EMPTY_MULTI_ENTRY_SUMMARY;
const multiValued = new Set<string>();
const countDistribution: Record<string, number> = {};
let largestCount = 0;
let multiEntryRowIndex = -1;
// Fallback sample for Step 1 when no row is multi-entry: the first row that
// carries any classifier value.
let firstClassifiedRowIndex = -1;
// First row index seen at each distinct count ≥ 2 — one ordering sample per
// count (ADR-0004 amendment): each count needs its own confirmed permutation.
const sampleRowIndexByCount: Record<string, number> = {};
rows.forEach((row, index) => {
let rowMax = 0;
let hasValue = false;
for (const [field, header] of classifierCols) {
const n = splitEntries(row[header]).length;
if (n > 0) hasValue = true;
if (n > 1) multiValued.add(field);
if (n > rowMax) rowMax = n;
}
if (hasValue && firstClassifiedRowIndex === -1) firstClassifiedRowIndex = index;
if (rowMax >= 2) {
const key = String(rowMax);
countDistribution[key] = (countDistribution[key] ?? 0) + 1;
if (sampleRowIndexByCount[key] === undefined) sampleRowIndexByCount[key] = index;
// First row at a new maximum becomes the multi-entry (Step 1) sample.
if (rowMax > largestCount) {
largestCount = rowMax;
multiEntryRowIndex = index;
}
}
});
const sampleRowIndex =
multiEntryRowIndex !== -1 ? multiEntryRowIndex : firstClassifiedRowIndex;
if (sampleRowIndex === -1) {
return {
multiValuedFields: [...multiValued],
countDistribution,
largestCount,
sample: null,
samplesByCount: {},
};
}
// One ordering sample per distinct count, so the UI can render a panel per
// count and the user confirms each independently.
const samplesByCount: Record<string, MultiEntrySample> = {};
for (const [count, rowIndex] of Object.entries(sampleRowIndexByCount)) {
samplesByCount[count] = sampleFromRow(rows[rowIndex], columnMapping, classifierCols, Number(count));
}
return {
multiValuedFields: [...multiValued],
countDistribution,
largestCount,
sample: sampleFromRow(
rows[sampleRowIndex],
columnMapping,
classifierCols,
largestCount >= 2 ? largestCount : 1,
),
samplesByCount,
};
}
// Build the sample for one row: its display address plus every mapped classifier
// column carrying a value. Step 1 lists all columns; Step 2's order table filters
// to the multi-valued ones (single-value columns are whole-dwelling facts).
function sampleFromRow(
row: Record<string, unknown>,
columnMapping: Record<string, string>,
classifierCols: Array<[string, string]>,
count: number,
): MultiEntrySample {
const columns: MultiEntryColumn[] = classifierCols
.map(([field, header]) => ({ field, header, entries: splitEntries(row[header]) }))
.filter((column) => column.entries.length > 0);
return { address: buildAddress(row, columnMapping), count, columns };
}

View file

@ -0,0 +1,23 @@
// Shared S3 key conventions + the synthetic join-column name for bulk-upload
// artifacts. The finaliser join (ADR-0006) depends on the classifier CSV key
// being built *identically* in two places — where the CSV is written
// (start-address-matching) and where the finaliser is dispatched
// (dispatchFinaliser) — and on the `source_row_id` column appearing in both the
// address CSV and the classifier CSV. Keeping the convention here is the single
// source of truth that stops those two callers drifting.
export const BULK_UPLOAD_INPUT_PREFIX = "bulk_onboarding_inputs";
export function addressCsvKey(portfolioId: string, uploadId: string): string {
return `${BULK_UPLOAD_INPUT_PREFIX}/${portfolioId}/${uploadId}.csv`;
}
export function classifierCsvKey(portfolioId: string, uploadId: string): string {
return `${BULK_UPLOAD_INPUT_PREFIX}/${portfolioId}/${uploadId}-classifier.csv`;
}
// The synthetic per-row UUID column. Minted at start-address-matching and
// emitted into both CSVs so the finaliser can join a row's identity (combiner
// output) to its raw descriptions (classifier CSV). The Model finaliser reads
// this exact header — keep the two in sync.
export const SOURCE_ROW_ID_COLUMN = "source_row_id";

View file

@ -1,9 +1,25 @@
import { db } from "@/app/db/db";
import { bulkAddressUploads } from "@/app/db/schema/bulk_address_uploads";
import {
landlordPropertyTypeOverrides,
landlordBuiltFormTypeOverrides,
landlordWallTypeOverrides,
landlordRoofTypeOverrides,
PropertyTypeValues,
BuiltFormTypeValues,
WallTypeValues,
RoofTypeValues,
} from "@/app/db/schema/landlord_overrides";
import { tasks } from "@/app/db/schema/tasks/tasks";
import { subTasks } from "@/app/db/schema/tasks/subtask";
import { count, desc, eq, sql } from "drizzle-orm";
import { and, count, desc, eq, inArray, sql } from "drizzle-orm";
import type { BulkUpload, BulkUploadStatus, ProgressView, TaskSummary } from "./types";
import { validateColumnMapping, classifierMapping } from "./columnFields";
import { classifierCsvKey } from "./s3Keys";
import { retrofitDataS3Bucket } from "@/app/utils/s3";
import { SUBTASK_SERVICE } from "./types";
import type { MultiEntrySummary } from "./multiEntry";
import { isPermutation } from "./multiEntry";
const REMAP_ALLOWED: ReadonlySet<BulkUploadStatus> = new Set([
"ready_for_processing",
@ -78,6 +94,12 @@ async function loadTaskSummary(taskId: string): Promise<TaskSummary | null> {
totalSubtasks: count(subTasks.id),
completedSubtasks: sql<number>`count(case when lower(${subTasks.status}) in ('completed', 'complete') then 1 end)::int`,
failedSubtasks: sql<number>`count(case when lower(${subTasks.status}) in ('failed', 'failure', 'error') then 1 end)::int`,
addressTotal: sql<number>`count(case when (${subTasks.service} = 'address2uprn' or ${subTasks.service} is null) and ${subTasks.id} is not null then 1 end)::int`,
addressCompleted: sql<number>`count(case when (${subTasks.service} = 'address2uprn' or ${subTasks.service} is null) and lower(${subTasks.status}) in ('completed', 'complete') then 1 end)::int`,
addressFailed: sql<number>`count(case when (${subTasks.service} = 'address2uprn' or ${subTasks.service} is null) and lower(${subTasks.status}) in ('failed', 'failure', 'error') then 1 end)::int`,
classifierTotal: sql<number>`count(case when ${subTasks.service} = 'landlord_description_overrides' then 1 end)::int`,
classifierCompleted: sql<number>`count(case when ${subTasks.service} = 'landlord_description_overrides' and lower(${subTasks.status}) in ('completed', 'complete') then 1 end)::int`,
classifierFailed: sql<number>`count(case when ${subTasks.service} = 'landlord_description_overrides' and lower(${subTasks.status}) in ('failed', 'failure', 'error') then 1 end)::int`,
})
.from(tasks)
.leftJoin(subTasks, eq(subTasks.taskId, tasks.id))
@ -94,11 +116,300 @@ export async function getProgressView(uploadId: string): Promise<ProgressView |
return { upload, task };
}
function validateMapping(mapping: Record<string, string>): string | null {
const values = Object.values(mapping);
if (!values.includes("address_1")) return "Mapping must include address_1.";
if (!values.includes("postcode")) return "Mapping must include postcode.";
return null;
// Persist the multi-entry building-part detection (ADR-0004). Computed once at
// start-address-matching from the already-parsed rows; read back on the
// awaiting_review surface. Only this column is touched, so the later
// status/taskId update leaves it intact.
export async function saveMultiEntrySummary(
uploadId: string,
summary: MultiEntrySummary,
): Promise<void> {
await db
.update(bulkAddressUploads)
.set({ multiEntrySummary: summary })
.where(eq(bulkAddressUploads.id, uploadId));
}
// Classifier field -> resolved enum, keyed by the normalized description that
// the classifier persisted. Empty inner maps / absent fields mean "not
// classified (yet)". Read-only (ADR-0004, issue #298).
export type SampleClassifications = Record<string, Record<string, string>>;
// Look up the classifier's resolved enums for one field's descriptions. One
// branch per category keeps drizzle's per-table enum typing intact.
async function lookupOverrides(
field: string,
portfolioId: bigint,
descriptions: string[],
): Promise<Array<{ description: string; value: string }> | null> {
switch (field) {
case "property_type":
return db
.select({ description: landlordPropertyTypeOverrides.description, value: landlordPropertyTypeOverrides.value })
.from(landlordPropertyTypeOverrides)
.where(and(eq(landlordPropertyTypeOverrides.portfolioId, portfolioId), inArray(landlordPropertyTypeOverrides.description, descriptions)));
case "built_form_type":
return db
.select({ description: landlordBuiltFormTypeOverrides.description, value: landlordBuiltFormTypeOverrides.value })
.from(landlordBuiltFormTypeOverrides)
.where(and(eq(landlordBuiltFormTypeOverrides.portfolioId, portfolioId), inArray(landlordBuiltFormTypeOverrides.description, descriptions)));
case "wall_type":
return db
.select({ description: landlordWallTypeOverrides.description, value: landlordWallTypeOverrides.value })
.from(landlordWallTypeOverrides)
.where(and(eq(landlordWallTypeOverrides.portfolioId, portfolioId), inArray(landlordWallTypeOverrides.description, descriptions)));
case "roof_type":
return db
.select({ description: landlordRoofTypeOverrides.description, value: landlordRoofTypeOverrides.value })
.from(landlordRoofTypeOverrides)
.where(and(eq(landlordRoofTypeOverrides.portfolioId, portfolioId), inArray(landlordRoofTypeOverrides.description, descriptions)));
default:
return null;
}
}
// The classifier's enums for the review samples' entries, joined by the
// normalized description (exact match — the summary stored it the way the
// classifier persists it, so no re-normalization here). Read-only. Covers the
// Step 1 verify sample AND every per-count ordering sample, since smaller-count
// panels may show descriptions the largest-count sample doesn't (ADR-0004
// amendment).
export async function getSampleClassifications(
uploadId: string,
): Promise<SampleClassifications> {
const upload = await loadById(uploadId);
const summary = upload?.multiEntrySummary;
if (!upload || !summary || !summary.sample) return {};
// Gather distinct descriptions per field across all samples.
const allSamples = [summary.sample, ...Object.values(summary.samplesByCount ?? {})];
const descriptionsByField: Record<string, Set<string>> = {};
for (const sample of allSamples) {
for (const column of sample.columns) {
const set = (descriptionsByField[column.field] ??= new Set<string>());
for (const e of column.entries) set.add(e.description);
}
}
const portfolioId = BigInt(upload.portfolioId);
const result: SampleClassifications = {};
for (const [field, descSet] of Object.entries(descriptionsByField)) {
const descriptions = [...descSet];
if (descriptions.length === 0) continue;
const rows = await lookupOverrides(field, portfolioId, descriptions);
if (!rows) continue;
result[field] = Object.fromEntries(rows.map((r) => [r.description, r.value]));
}
return result;
}
// Descriptions still classified `Unknown` per field, portfolio-wide (ADR-0006).
// `Unknown` is the classifier's "couldn't decide" marker; v2 treats it as
// never-final, so the Finalise gate blocks until the user maps every one to a
// real value (and the finaliser fails loudly if any slips through). Portfolio-
// wide is the right scope under the one-real-upload assumption (ADR-0006).
export type UnknownOverrides = Record<string, string[]>;
const UNKNOWN_VALUE = "Unknown";
async function unknownForField(field: string, portfolioId: bigint): Promise<string[]> {
switch (field) {
case "property_type":
return (
await db
.select({ description: landlordPropertyTypeOverrides.description })
.from(landlordPropertyTypeOverrides)
.where(and(eq(landlordPropertyTypeOverrides.portfolioId, portfolioId), eq(landlordPropertyTypeOverrides.value, UNKNOWN_VALUE)))
).map((r) => r.description);
case "built_form_type":
return (
await db
.select({ description: landlordBuiltFormTypeOverrides.description })
.from(landlordBuiltFormTypeOverrides)
.where(and(eq(landlordBuiltFormTypeOverrides.portfolioId, portfolioId), eq(landlordBuiltFormTypeOverrides.value, UNKNOWN_VALUE)))
).map((r) => r.description);
case "wall_type":
return (
await db
.select({ description: landlordWallTypeOverrides.description })
.from(landlordWallTypeOverrides)
.where(and(eq(landlordWallTypeOverrides.portfolioId, portfolioId), eq(landlordWallTypeOverrides.value, UNKNOWN_VALUE)))
).map((r) => r.description);
case "roof_type":
return (
await db
.select({ description: landlordRoofTypeOverrides.description })
.from(landlordRoofTypeOverrides)
.where(and(eq(landlordRoofTypeOverrides.portfolioId, portfolioId), eq(landlordRoofTypeOverrides.value, UNKNOWN_VALUE)))
).map((r) => r.description);
default:
return [];
}
}
export async function getUnknownOverrides(portfolioId: string): Promise<UnknownOverrides> {
const pid = BigInt(portfolioId);
const result: UnknownOverrides = {};
for (const field of ["property_type", "built_form_type", "wall_type", "roof_type"]) {
const descriptions = await unknownForField(field, pid);
if (descriptions.length > 0) result[field] = descriptions;
}
return result;
}
// Valid enum values per classifier category, for validating a user edit.
const CATEGORY_VALUES: Record<string, readonly string[]> = {
property_type: PropertyTypeValues,
built_form_type: BuiltFormTypeValues,
wall_type: WallTypeValues,
roof_type: RoofTypeValues,
};
// Upsert one user override (source='user') into the right category table. One
// branch per table keeps drizzle's per-table typing intact; the unique
// (portfolio_id, description) drives the conflict. Sets source='user' so the
// classifier's `ON CONFLICT … WHERE source='classifier'` never re-clobbers it.
async function upsertUserOverride(
field: string,
portfolioId: bigint,
description: string,
value: string,
): Promise<void> {
const now = new Date();
switch (field) {
case "property_type":
await db.insert(landlordPropertyTypeOverrides)
.values({ portfolioId, description, value, source: "user" })
.onConflictDoUpdate({
target: [landlordPropertyTypeOverrides.portfolioId, landlordPropertyTypeOverrides.description],
set: { value, source: "user", updatedAt: now },
});
return;
case "built_form_type":
await db.insert(landlordBuiltFormTypeOverrides)
.values({ portfolioId, description, value, source: "user" })
.onConflictDoUpdate({
target: [landlordBuiltFormTypeOverrides.portfolioId, landlordBuiltFormTypeOverrides.description],
set: { value, source: "user", updatedAt: now },
});
return;
case "wall_type":
await db.insert(landlordWallTypeOverrides)
.values({ portfolioId, description, value, source: "user" })
.onConflictDoUpdate({
target: [landlordWallTypeOverrides.portfolioId, landlordWallTypeOverrides.description],
set: { value, source: "user", updatedAt: now },
});
return;
case "roof_type":
await db.insert(landlordRoofTypeOverrides)
.values({ portfolioId, description, value, source: "user" })
.onConflictDoUpdate({
target: [landlordRoofTypeOverrides.portfolioId, landlordRoofTypeOverrides.description],
set: { value, source: "user", updatedAt: now },
});
return;
}
}
export type SetClassificationOutcome =
| { kind: "ok" }
| { kind: "invalid"; reason: string };
// Correct one classification, persisted as a user override (ADR-0004, issue
// #299). Keyed by (portfolio, description), so it changes the mapping for every
// row with that description portfolio-wide. The description is normalized to
// match the classifier's stored key.
export async function setClassificationOverride(
portfolioId: string,
field: string,
description: string,
value: string,
): Promise<SetClassificationOutcome> {
const allowed = CATEGORY_VALUES[field];
if (!allowed) return { kind: "invalid", reason: `Unknown category '${field}'` };
if (!allowed.includes(value))
return { kind: "invalid", reason: `'${value}' is not a valid ${field}` };
const normalized = description.trim().toLowerCase();
if (!normalized) return { kind: "invalid", reason: "Empty description" };
await upsertUserOverride(field, BigInt(portfolioId), normalized, value);
return { kind: "ok" };
}
export type SetOrderingOutcome =
| { kind: "ok"; upload: BulkUpload }
| { kind: "not_found" }
| { kind: "wrong_state"; current: string }
| { kind: "not_multi_entry" }
| { kind: "invalid_ordering"; reason: string };
// Persist the user-confirmed building-part ordering (ADR-0004, amended
// 2026-06-05). Allowed only at awaiting_review and only when the upload is
// multi-entry. Each distinct count ≥ 2 needs its own permutation; the UI confirms
// one count at a time, so we MERGE the supplied permutations into any already
// stored, validate each is a bijection, and only mark `confirmed` once EVERY
// detected count has a permutation (which gates Finalise).
export async function setMultiEntryOrdering(
uploadId: string,
permutations: Record<string, number[]>,
): Promise<SetOrderingOutcome> {
const upload = await loadById(uploadId);
if (!upload) return { kind: "not_found" };
if (upload.status !== "awaiting_review")
return { kind: "wrong_state", current: upload.status };
const summary = upload.multiEntrySummary;
// A sample now exists for non-multi-entry uploads too (Step 1's verify
// sample), so "is multi-entry" is largestCount >= 2, not "has a sample".
if (!summary || summary.largestCount < 2)
return { kind: "not_multi_entry" };
for (const [count, permutation] of Object.entries(permutations)) {
if (permutation.length !== Number(count) || !isPermutation(permutation))
return { kind: "invalid_ordering", reason: `Ordering for ${count} parts is not a valid arrangement.` };
}
// Merge with any counts confirmed earlier, then decide whether every detected
// count (the keys of countDistribution, all ≥ 2) now has a permutation.
const merged = { ...(upload.multiEntryOrdering?.permutations ?? {}), ...permutations };
const requiredCounts = Object.keys(summary.countDistribution);
const confirmed = requiredCounts.every(
(c) => Array.isArray(merged[c]) && merged[c].length === Number(c),
);
const [updated] = await db
.update(bulkAddressUploads)
.set({ multiEntryOrdering: { permutations: merged, confirmed } })
.where(eq(bulkAddressUploads.id, uploadId))
.returning();
if (!updated) return { kind: "not_found" };
return { kind: "ok", upload: updated };
}
export type SetVerifyAckOutcome =
| { kind: "ok"; upload: BulkUpload }
| { kind: "not_found" }
| { kind: "wrong_state"; current: string };
// Record the user's "Verify classification" acknowledgement (ADR-0004 Step 1).
// Allowed only at awaiting_review. Gates Finalise whenever the upload has
// classifier columns — independent of multi-entry, hence its own column rather
// than a flag on multiEntryOrdering.
export async function setVerifyAck(uploadId: string): Promise<SetVerifyAckOutcome> {
const upload = await loadById(uploadId);
if (!upload) return { kind: "not_found" };
if (upload.status !== "awaiting_review")
return { kind: "wrong_state", current: upload.status };
const [updated] = await db
.update(bulkAddressUploads)
.set({ verifyAck: true })
.where(eq(bulkAddressUploads.id, uploadId))
.returning();
if (!updated) return { kind: "not_found" };
return { kind: "ok", upload: updated };
}
export type SetMappingOutcome =
@ -116,7 +427,7 @@ export async function setColumnMapping(
if (!REMAP_ALLOWED.has(upload.status as BulkUploadStatus))
return { kind: "invalid_status", current: upload.status };
const reason = validateMapping(mapping);
const reason = validateColumnMapping(mapping);
if (reason) return { kind: "invalid_mapping", reason };
const [updated] = await db
@ -174,6 +485,7 @@ export async function triggerAddressMatching(args: {
.values({
taskId: task.id,
status: "waiting",
service: SUBTASK_SERVICE.address,
inputs: JSON.stringify({ bulk_upload_id: args.uploadId }),
})
.returning();
@ -209,9 +521,69 @@ export async function triggerAddressMatching(args: {
return { kind: "ok", taskId: task.id };
}
// Co-fires the landlord classifier as a subtask under the address task. Reads a
// dedicated classifier CSV (the classifier columns converted from the upload by
// the start-address-matching route — the address-matching CSV strips the
// description columns), so the lambda always parses a real CSV even for
// .xlsx/.xls uploads. Non-blocking: a trigger failure marks only the classifier
// subtask, so address matching is unaffected. See ADR-0003.
export async function triggerClassifier(args: {
taskId: string;
uploadId: string;
s3Uri: string;
sessionToken: string | undefined;
}): Promise<void> {
const upload = await loadById(args.uploadId);
if (!upload || !upload.columnMapping) return;
const columnMapping = classifierMapping(upload.columnMapping);
if (Object.keys(columnMapping).length === 0) return;
const [subTask] = await db
.insert(subTasks)
.values({
taskId: args.taskId,
status: "waiting",
service: SUBTASK_SERVICE.classifier,
inputs: JSON.stringify({ bulk_upload_id: args.uploadId }),
})
.returning();
const payload = {
task_id: args.taskId,
sub_task_id: subTask.id,
s3_uri: args.s3Uri,
portfolio_id: Number(upload.portfolioId),
column_mapping: columnMapping,
};
const trigger = await triggerFastApiPipeline({
endpoint: "/v1/bulk-uploads/trigger-landlord-overrides",
payload,
sessionToken: args.sessionToken,
});
if (!trigger.ok) {
await db
.update(subTasks)
.set({
status: "failed",
outputs: JSON.stringify({ error: trigger.message }),
})
.where(eq(subTasks.id, subTask.id));
return;
}
await db
.update(subTasks)
.set({ status: "in progress", inputs: JSON.stringify(payload) })
.where(eq(subTasks.id, subTask.id));
}
export type CombineRetriggerOutcome =
| { kind: "triggered"; taskId: string; subTaskId: string }
| { kind: "already_combined" }
| { kind: "already_dispatched" }
| { kind: "not_found" }
| { kind: "missing_task" }
| { kind: "trigger_failed"; status: number; message: string };
@ -225,6 +597,24 @@ export async function requestCombineRetrigger(args: {
if (!upload.taskId) return { kind: "missing_task" };
if (upload.combinedOutputS3Uri) return { kind: "already_combined" };
// CAS: atomically claim `processing → combining` — the double-dispatch guard
// (mirrors dispatchFinaliser's awaiting_review → finalising, ADR-0005). Of two
// rapid "Run Combiner" clicks exactly one flips the row; the loser updates 0
// rows and bails, so only one combiner subtask is ever dispatched. It also
// closes the window where status is still `processing` because the backend
// hasn't written `combining` yet.
const claimed = await db
.update(bulkAddressUploads)
.set({ status: "combining" })
.where(
and(
eq(bulkAddressUploads.id, args.uploadId),
eq(bulkAddressUploads.status, "processing"),
),
)
.returning();
if (claimed.length === 0) return { kind: "already_dispatched" };
const [subTask] = await db
.insert(subTasks)
.values({ taskId: upload.taskId, status: "waiting" })
@ -237,8 +627,23 @@ export async function requestCombineRetrigger(args: {
payload,
sessionToken: args.sessionToken,
});
if (!trigger.ok)
if (!trigger.ok) {
// Roll the claim back so the user can retry, and fail the subtask.
await Promise.all([
db
.update(bulkAddressUploads)
.set({ status: "processing" })
.where(eq(bulkAddressUploads.id, args.uploadId)),
db
.update(subTasks)
.set({
status: "failed",
outputs: JSON.stringify({ error: trigger.message }),
})
.where(eq(subTasks.id, subTask.id)),
]);
return { kind: "trigger_failed", status: trigger.status, message: trigger.message };
}
await db
.update(subTasks)
@ -265,9 +670,125 @@ export async function loadForFinalize(uploadId: string): Promise<LoadForFinalize
return { kind: "ready", upload };
}
export async function markFinalized(uploadId: string): Promise<void> {
await db
export type DispatchFinaliserOutcome =
| { kind: "ok"; taskId: string; subTaskId: string }
| { kind: "not_found" }
| { kind: "already_finalized" }
| { kind: "not_yet_combined" }
| { kind: "wrong_state"; current: string }
| { kind: "missing_task" }
| { kind: "trigger_failed"; status: number; message: string };
// Dispatch the async bulk_upload_finaliser (ADR-0005). Replaces the old
// synchronous property insert + markFinalized. Order matters:
// 1. loadForFinalize — rich guards (combined output present, awaiting_review).
// 2. CAS claim `awaiting_review → finalising` — the double-dispatch guard:
// of two simultaneous clicks exactly one updates a row; the loser gets 409.
// 3. create the finaliser subtask under the upload's existing Task + POST the
// trigger. On trigger failure, revert the status so the user can retry and
// mark the subtask failed. The backend writes the terminal complete/failed.
export async function dispatchFinaliser(args: {
uploadId: string;
sessionToken: string | undefined;
}): Promise<DispatchFinaliserOutcome> {
const guarded = await loadForFinalize(args.uploadId);
switch (guarded.kind) {
case "not_found":
return { kind: "not_found" };
case "already_finalized":
return { kind: "already_finalized" };
case "not_yet_combined":
return { kind: "not_yet_combined" };
case "wrong_state":
return { kind: "wrong_state", current: guarded.current };
}
const upload = guarded.upload;
if (!upload.taskId) return { kind: "missing_task" };
// CAS: atomically claim the dispatch. Only the request that flips
// awaiting_review → finalising proceeds; a concurrent one updates 0 rows.
const claimed = await db
.update(bulkAddressUploads)
.set({ status: "complete" })
.where(eq(bulkAddressUploads.id, uploadId));
.set({ status: "finalising" })
.where(
and(
eq(bulkAddressUploads.id, args.uploadId),
eq(bulkAddressUploads.status, "awaiting_review"),
),
)
.returning();
if (claimed.length === 0) {
const current = await loadById(args.uploadId);
if (current?.status === "complete") return { kind: "already_finalized" };
return { kind: "wrong_state", current: current?.status ?? "unknown" };
}
const [subTask] = await db
.insert(subTasks)
.values({
taskId: upload.taskId,
status: "waiting",
service: SUBTASK_SERVICE.finaliser,
inputs: JSON.stringify({ bulk_upload_id: args.uploadId }),
})
.returning();
// v2 (ADR-0006): the finaliser also writes property_overrides for UPRN-matched
// rows, which needs the classifier CSV (raw descriptions, joined to the
// combiner output by `source_row_id`) and the confirmed building-part ordering.
// Both are derivable here — we already hold the upload row, and dispatch runs
// after the user confirms ordering, so the value is final.
// - classifier_s3_uri: null when no classifier columns were mapped (no
// classifier CSV was written; the finaliser then writes no overrides).
// - multi_entry_ordering: permutations keyed by entry-count; {} when the
// upload is not multi-entry (every cell is a single building part → part 0).
const classifierMap = classifierMapping(upload.columnMapping ?? {});
const classifierS3Uri =
Object.keys(classifierMap).length > 0
? `s3://${retrofitDataS3Bucket()}/${classifierCsvKey(upload.portfolioId, args.uploadId)}`
: null;
const payload = {
task_id: upload.taskId,
sub_task_id: subTask.id,
s3_uri: upload.combinedOutputS3Uri,
portfolio_id: Number(upload.portfolioId),
bulk_upload_id: args.uploadId,
classifier_s3_uri: classifierS3Uri,
multi_entry_ordering: upload.multiEntryOrdering?.permutations ?? {},
// classifier category → source CSV header, so the finaliser knows which
// classifier-CSV column feeds each override_component (ADR-0006).
column_mapping: classifierMap,
};
const trigger = await triggerFastApiPipeline({
endpoint: "/v1/bulk-uploads/trigger-finaliser",
payload,
sessionToken: args.sessionToken,
});
if (!trigger.ok) {
// Roll the claim back so the user can retry, and fail the subtask.
await Promise.all([
db
.update(bulkAddressUploads)
.set({ status: "awaiting_review" })
.where(eq(bulkAddressUploads.id, args.uploadId)),
db
.update(subTasks)
.set({
status: "failed",
outputs: JSON.stringify({ error: trigger.message }),
})
.where(eq(subTasks.id, subTask.id)),
]);
return { kind: "trigger_failed", status: trigger.status, message: trigger.message };
}
await db
.update(subTasks)
.set({ status: "in progress", inputs: JSON.stringify(payload) })
.where(eq(subTasks.id, subTask.id));
return { kind: "ok", taskId: upload.taskId, subTaskId: subTask.id };
}

View file

@ -6,6 +6,10 @@ export const BULK_UPLOAD_STATUSES = [
"processing",
"combining",
"awaiting_review",
// In-flight state of the async finaliser (ADR-0005); mirrors `combining`. The
// status column is free text, so no enum migration is needed. UI renders this
// as "Uploading to ARA" — see STATUS_LABELS.
"finalising",
"complete",
"failed",
] as const;
@ -14,6 +18,32 @@ export type BulkUploadStatus = (typeof BULK_UPLOAD_STATUSES)[number];
export type BulkUpload = typeof bulkAddressUploads.$inferSelect;
// sub_task.service values. NULL is treated as address (legacy rows + the
// backend-spawned postcode-split children, which don't set it). See ADR-0003.
export const SUBTASK_SERVICE = {
address: "address2uprn",
classifier: "landlord_description_overrides",
finaliser: "bulk_upload_finaliser",
} as const;
// User-facing label for a BulkUpload status. The persisted enum value stays
// canonical (`finalising`); the product surface for that state is "Uploading to
// ARA" (ADR-0005 — a display-layer label, never the enum name).
export const STATUS_LABELS: Record<string, string> = {
ready_for_processing: "Ready for processing",
mapping_complete: "Mapping complete",
processing: "Processing",
combining: "Combining",
awaiting_review: "Awaiting review",
finalising: "Uploading to ARA",
complete: "Complete",
failed: "Failed",
};
export function statusLabel(status: string): string {
return STATUS_LABELS[status] ?? status;
}
export type TaskSummary = {
id: string;
taskSource: string;
@ -25,6 +55,14 @@ export type TaskSummary = {
totalSubtasks: number;
completedSubtasks: number;
failedSubtasks: number;
// Per-pipeline breakdown so onboarding progress can separate address
// matching from landlord classification (ADR-0003).
addressTotal: number;
addressCompleted: number;
addressFailed: number;
classifierTotal: number;
classifierCompleted: number;
classifierFailed: number;
};
export type ProgressView = {

View file

@ -0,0 +1,79 @@
import { db } from "@/app/db/db";
import {
landlordPropertyTypeOverrides,
landlordBuiltFormTypeOverrides,
landlordWallTypeOverrides,
landlordRoofTypeOverrides,
} from "@/app/db/schema/landlord_overrides";
import { asc, eq } from "drizzle-orm";
export interface OverrideRow {
description: string;
value: string;
source: string;
}
export type LandlordOverrideCategory =
| "property_type"
| "built_form_type"
| "wall_type"
| "roof_type";
export type LandlordOverrideResults = Record<LandlordOverrideCategory, OverrideRow[]>;
const EMPTY: LandlordOverrideResults = {
property_type: [],
built_form_type: [],
wall_type: [],
roof_type: [],
};
// Reads the four landlord_*_overrides tables for a portfolio. The portfolio id
// is a bigint FK; the bulk-upload flow carries it as a numeric string.
export async function getLandlordOverrides(
portfolioId: string
): Promise<LandlordOverrideResults> {
if (!/^\d+$/.test(portfolioId)) return EMPTY;
const pid = BigInt(portfolioId);
const [property_type, built_form_type, wall_type, roof_type] = await Promise.all([
db
.select({
description: landlordPropertyTypeOverrides.description,
value: landlordPropertyTypeOverrides.value,
source: landlordPropertyTypeOverrides.source,
})
.from(landlordPropertyTypeOverrides)
.where(eq(landlordPropertyTypeOverrides.portfolioId, pid))
.orderBy(asc(landlordPropertyTypeOverrides.description)),
db
.select({
description: landlordBuiltFormTypeOverrides.description,
value: landlordBuiltFormTypeOverrides.value,
source: landlordBuiltFormTypeOverrides.source,
})
.from(landlordBuiltFormTypeOverrides)
.where(eq(landlordBuiltFormTypeOverrides.portfolioId, pid))
.orderBy(asc(landlordBuiltFormTypeOverrides.description)),
db
.select({
description: landlordWallTypeOverrides.description,
value: landlordWallTypeOverrides.value,
source: landlordWallTypeOverrides.source,
})
.from(landlordWallTypeOverrides)
.where(eq(landlordWallTypeOverrides.portfolioId, pid))
.orderBy(asc(landlordWallTypeOverrides.description)),
db
.select({
description: landlordRoofTypeOverrides.description,
value: landlordRoofTypeOverrides.value,
source: landlordRoofTypeOverrides.source,
})
.from(landlordRoofTypeOverrides)
.where(eq(landlordRoofTypeOverrides.portfolioId, pid))
.orderBy(asc(landlordRoofTypeOverrides.description)),
]);
return { property_type, built_form_type, wall_type, roof_type };
}