Merge branch 'main' into feature/frontend_landlord_overrides

This commit is contained in:
Jun-te Kim 2026-06-08 14:00:20 +01:00 committed by GitHub
commit cc134f8ce9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 33134 additions and 61 deletions

View file

@ -107,6 +107,8 @@ See [ADR-0001](./docs/adr/0001-bulk-upload-state-machine.md) for the deliberate
- A **BulkUpload** produces zero or more **Properties** when finalised.
- A **BulkUpload** has at most one **Task** (the orchestration handle for the FastAPI pipeline run); a Task has many **SubTasks** (one per pipeline stage: address matching, combiner).
- A **Portfolio** has many **VocabularyMappings** — one row per `(category, description)` it has ever encountered across all its BulkUploads. See [ADR-0002](./docs/adr/0002-landlord-override-vocabulary.md).
- A **Recommendation** belongs to exactly one **Plan**. Denormalised onto `recommendation.plan_id`; the `plan_recommendations` join table is being retired.
- A **Recommendation** has at most one **Material**. Denormalised onto `recommendation.material_id` (+ `material_quantity`, `material_quantity_unit`, `material_depth`). Historically (pre-~2023) a recommendation could carry multiple materials; ~128 such legacy rows were reconciled to one each on 2026-06-07. The cardinality guard in the backfill enforces this going forward.
### Baseline performance

View file

@ -13,7 +13,8 @@
"test:e2e:run": "cypress run",
"migration:generate": "drizzle-kit generate",
"migration:migrate": "drizzle-kit migrate",
"create_user": "tsx src/app/db/create_user.ts"
"create_user": "tsx src/app/db/create_user.ts",
"backfill:recommendation-denormalization": "tsx src/app/db/backfill-recommendation-denormalization.ts"
},
"dependencies": {
"@aws-sdk/client-s3": "^3.971.0",

View file

@ -0,0 +1,226 @@
/**
* Backfills the denormalised plan_id / material_* columns on `recommendation`
* from the plan_recommendations and recommendation_materials join tables, then
* builds the supporting indexes and validates the foreign keys all ONLINE.
*
* Why this lives outside drizzle:
* drizzle-kit migrate wraps every pending migration in ONE transaction, so it
* cannot COMMIT between batches, cannot run CREATE INDEX CONCURRENTLY, and
* holds the ADD COLUMN's AccessExclusiveLock for the whole run. This script
* instead commits each batch, keeping locks tiny, WAL/bloat bounded, EBS IO
* burst balance healthy, and progress visible + resumable.
* See docs/adr/0001-data-backfills-outside-drizzle.md
*
* Run AFTER `npm run migration:migrate` has applied 0222/0224 (the column adds):
* npm run backfill:recommendation-denormalization
*
* Safe to re-run: every step is idempotent (IS NULL guards, IF NOT EXISTS,
* VALIDATE on an already-valid constraint is a no-op). If interrupted, just run
* it again it resumes from wherever it left off.
*
* Tunables via env:
* BACKFILL_BATCH_SIZE rows scanned per committed batch (default 25000)
* BACKFILL_SLEEP_MS pause between batches, eases IO pressure (default 50)
*/
import dotenv from "dotenv";
import { Pool, type PoolClient } from "pg";
dotenv.config({ path: ".env.local" });
const BATCH_SIZE = Number(process.env.BACKFILL_BATCH_SIZE ?? 25_000);
const SLEEP_MS = Number(process.env.BACKFILL_SLEEP_MS ?? 50);
const pool = new Pool({
host: process.env.DB_HOST,
port: Number(process.env.DB_PORT),
user: process.env.DB_USERNAME,
password: process.env.DB_PASSWORD,
database: process.env.DB_NAME,
});
const sleep = (ms: number) => new Promise((r) => setTimeout(r, ms));
/** Abort before touching data if the 1:1 assumption is violated. */
async function assertSingleCardinality(
client: PoolClient,
joinTable: string,
): Promise<void> {
const { rows } = await client.query(
`SELECT count(*)::int AS violations
FROM (
SELECT recommendation_id
FROM ${joinTable}
GROUP BY recommendation_id
HAVING count(*) > 1
) dupes`,
);
const violations = rows[0].violations as number;
if (violations > 0) {
throw new Error(
`Aborting: ${violations} recommendation(s) map to multiple ${joinTable} ` +
`rows. The backfill cannot pick deterministically. Resolve cardinality first.`,
);
}
}
/**
* Keyset-paginate the whole `recommendation` table by id and, for each batch,
* set the target columns from the join table where a match exists. Each batch
* is its own autocommitted statement (no surrounding BEGIN), so locks are held
* only for the rows in that batch.
*
* We scan by id (not `WHERE col IS NULL LIMIT n`) so rows with no match don't
* get re-selected forever they're simply left NULL and we move past them.
*/
async function backfillColumns(
client: PoolClient,
label: string,
updateSql: (idFrom: string, limit: string) => string,
): Promise<void> {
let lastId = "0";
let scannedTotal = 0;
let updatedTotal = 0;
const startedAt = Date.now();
for (;;) {
const { rows } = await client.query(updateSql("$1", "$2"), [
lastId,
BATCH_SIZE,
]);
const { max_id, scanned, updated } = rows[0] as {
max_id: string | null;
scanned: number;
updated: number;
};
if (!scanned || max_id === null) break;
scannedTotal += Number(scanned);
updatedTotal += Number(updated);
lastId = max_id;
const rate = Math.round(scannedTotal / ((Date.now() - startedAt) / 1000));
console.log(
`[${label}] up to id ${lastId} — scanned ${scannedTotal.toLocaleString()}, ` +
`updated ${updatedTotal.toLocaleString()} (${rate.toLocaleString()} rows/s scan)`,
);
if (SLEEP_MS > 0) await sleep(SLEEP_MS);
}
console.log(
`[${label}] done: scanned ${scannedTotal.toLocaleString()}, ` +
`updated ${updatedTotal.toLocaleString()} in ` +
`${Math.round((Date.now() - startedAt) / 1000)}s`,
);
}
async function main(): Promise<void> {
const client = await pool.connect();
try {
console.log(
`Config: batch=${BATCH_SIZE.toLocaleString()} rows, sleep=${SLEEP_MS}ms\n`,
);
// 1. Guard the 1:1 assumption before mutating anything.
console.log("Checking cardinality...");
await assertSingleCardinality(client, "plan_recommendations");
await assertSingleCardinality(client, "recommendation_materials");
// 2. Backfill plan_id.
await backfillColumns(
client,
"plan_id",
(idFrom, limit) => `
WITH batch AS (
SELECT id FROM recommendation
WHERE id > ${idFrom}
ORDER BY id
LIMIT ${limit}
),
upd AS (
UPDATE recommendation r
SET plan_id = pr.plan_id
FROM batch b
JOIN plan_recommendations pr ON pr.recommendation_id = b.id
WHERE r.id = b.id
AND r.plan_id IS NULL
RETURNING r.id
)
SELECT (SELECT max(id) FROM batch) AS max_id,
(SELECT count(*) FROM batch) AS scanned,
(SELECT count(*) FROM upd) AS updated`,
);
// 3. Backfill the four material_* columns in one pass.
await backfillColumns(
client,
"material",
(idFrom, limit) => `
WITH batch AS (
SELECT id FROM recommendation
WHERE id > ${idFrom}
ORDER BY id
LIMIT ${limit}
),
upd AS (
UPDATE recommendation r
SET material_id = rm.material_id,
material_quantity = rm.quantity,
material_quantity_unit = rm.quantity_unit,
material_depth = rm.depth
FROM batch b
JOIN recommendation_materials rm ON rm.recommendation_id = b.id
WHERE r.id = b.id
AND r.material_id IS NULL
RETURNING r.id
)
SELECT (SELECT max(id) FROM batch) AS max_id,
(SELECT count(*) FROM batch) AS scanned,
(SELECT count(*) FROM upd) AS updated`,
);
// 4. Build indexes CONCURRENTLY (no write lock). Must NOT be in a txn —
// a pooled client with no BEGIN runs each statement autocommitted.
console.log("Creating indexes concurrently...");
await client.query(
`CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_recommendation_plan_id
ON recommendation USING btree (plan_id)`,
);
await client.query(
`CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_recommendation_material_id
ON recommendation USING btree (material_id)`,
);
// 5. Validate the FKs online (ShareUpdateExclusiveLock — allows reads/writes).
console.log("Validating foreign keys...");
await client.query(
`ALTER TABLE recommendation
VALIDATE CONSTRAINT recommendation_plan_id_plan_id_fk`,
);
await client.query(
`ALTER TABLE recommendation
VALIDATE CONSTRAINT recommendation_material_id_material_id_fk`,
);
// 6. Report any rows still unlinked (expected for material; investigate for plan).
const { rows } = await client.query(
`SELECT count(*) FILTER (WHERE plan_id IS NULL) AS plan_null,
count(*) FILTER (WHERE material_id IS NULL) AS material_null
FROM recommendation`,
);
console.log(
`\nRemaining NULLs — plan_id: ${Number(rows[0].plan_null).toLocaleString()}, ` +
`material_id: ${Number(rows[0].material_null).toLocaleString()}`,
);
console.log("Backfill complete.");
} finally {
client.release();
await pool.end();
}
}
main().catch((err) => {
console.error("Backfill failed:", err);
process.exit(1);
});

View file

@ -1,3 +1,7 @@
-- Metadata-only DDL: instant, holds AccessExclusiveLock on "recommendation"
-- only momentarily. The FK is added NOT VALID so no full-table validation scan
-- happens here; it is validated online by the standalone backfill script. The
-- index is created CONCURRENTLY by that script too, after the column is filled.
-- See docs/adr/0001-data-backfills-outside-drizzle.md
ALTER TABLE "recommendation" ADD COLUMN "plan_id" bigint;--> statement-breakpoint
ALTER TABLE "recommendation" ADD CONSTRAINT "recommendation_plan_id_plan_id_fk" FOREIGN KEY ("plan_id") REFERENCES "public"."plan"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
CREATE INDEX "idx_recommendation_plan_id" ON "recommendation" USING btree ("plan_id");
ALTER TABLE "recommendation" ADD CONSTRAINT "recommendation_plan_id_plan_id_fk" FOREIGN KEY ("plan_id") REFERENCES "public"."plan"("id") ON DELETE cascade ON UPDATE no action NOT VALID;

View file

@ -1,23 +1,13 @@
-- Guard: fail if any recommendation is linked to more than one plan.
-- The spec asserts each recommendation belongs to exactly one plan; if real
-- data violates that the backfill cannot pick deterministically and must not
-- proceed silently.
DO $$
BEGIN
IF EXISTS (
SELECT 1
FROM plan_recommendations
GROUP BY recommendation_id
HAVING count(*) > 1
) THEN
RAISE EXCEPTION
'plan_id backfill aborted: one or more recommendations appear in multiple '
'plan_recommendations rows. Resolve cardinality before re-running.';
END IF;
END $$;
UPDATE recommendation r
SET plan_id = pr.plan_id
FROM plan_recommendations pr
WHERE pr.recommendation_id = r.id
AND r.plan_id IS NULL;
-- Intentionally a no-op.
--
-- The plan_id backfill used to live here as a single full-table UPDATE inside
-- drizzle's migration transaction. Because drizzle wraps ALL pending migrations
-- in one transaction, that UPDATE held an AccessExclusiveLock on "recommendation"
-- (from the ADD COLUMN in 0222) for the entire multi-hour run, blocked unrelated
-- migrations behind it, exhausted EBS IO burst balance, and could not report
-- progress or be resumed.
--
-- The backfill now runs OUTSIDE drizzle, in committed batches, via:
-- npm run backfill:recommendation-denormalization
-- See docs/adr/0001-data-backfills-outside-drizzle.md
SELECT 1;

View file

@ -1,6 +1,10 @@
-- Metadata-only DDL: adding nullable columns with no default is an instant
-- catalog change. The FK is added NOT VALID (no scan); it is validated online
-- and the index built CONCURRENTLY by the standalone backfill script, after the
-- columns are populated.
-- See docs/adr/0001-data-backfills-outside-drizzle.md
ALTER TABLE "recommendation" ADD COLUMN "material_id" bigint;--> statement-breakpoint
ALTER TABLE "recommendation" ADD COLUMN "material_quantity" real;--> statement-breakpoint
ALTER TABLE "recommendation" ADD COLUMN "material_quantity_unit" "unit_quantity";--> statement-breakpoint
ALTER TABLE "recommendation" ADD COLUMN "material_depth" real;--> statement-breakpoint
ALTER TABLE "recommendation" ADD CONSTRAINT "recommendation_material_id_material_id_fk" FOREIGN KEY ("material_id") REFERENCES "public"."material"("id") ON DELETE set null ON UPDATE no action;--> statement-breakpoint
CREATE INDEX "idx_recommendation_material_id" ON "recommendation" USING btree ("material_id");
ALTER TABLE "recommendation" ADD CONSTRAINT "recommendation_material_id_material_id_fk" FOREIGN KEY ("material_id") REFERENCES "public"."material"("id") ON DELETE set null ON UPDATE no action NOT VALID;

View file

@ -1,26 +1,12 @@
-- Guard: fail if any recommendation maps to more than one material row.
-- The modelled fabric measures never produce this; if real data violates it
-- the backfill cannot pick deterministically and must not proceed silently.
DO $$
BEGIN
IF EXISTS (
SELECT 1
FROM recommendation_materials
GROUP BY recommendation_id
HAVING count(*) > 1
) THEN
RAISE EXCEPTION
'recommendation_material_id backfill aborted: one or more recommendations '
'have multiple recommendation_materials rows. Resolve cardinality before re-running.';
END IF;
END $$;
-- Backfill all four columns in a single pass.
UPDATE recommendation r
SET material_id = rm.material_id,
material_quantity = rm.quantity,
material_quantity_unit = rm.quantity_unit,
material_depth = rm.depth
FROM recommendation_materials rm
WHERE rm.recommendation_id = r.id
AND r.material_id IS NULL;
-- Intentionally a no-op.
--
-- The materials backfill (material_id + quantity/unit/depth) used to live here as
-- a single full-table UPDATE inside drizzle's migration transaction, with the
-- material_id index created BEFORE the backfill in 0224 — so every updated row
-- also had to maintain that index. It moved out for the same reasons as the
-- plan_id backfill (see 0223).
--
-- The backfill now runs OUTSIDE drizzle, in committed batches, via:
-- npm run backfill:recommendation-denormalization
-- See docs/adr/0001-data-backfills-outside-drizzle.md
SELECT 1;

View file

@ -0,0 +1,21 @@
CREATE TABLE "magic_plan_door_ventilation" (
"id" bigserial PRIMARY KEY NOT NULL,
"magic_plan_door_id" bigint NOT NULL,
"undercut_mm" real,
CONSTRAINT "magic_plan_door_ventilation_magic_plan_door_id_unique" UNIQUE("magic_plan_door_id")
);
--> statement-breakpoint
CREATE TABLE "magic_plan_window_ventilation" (
"id" bigserial PRIMARY KEY NOT NULL,
"magic_plan_window_id" bigint NOT NULL,
"opening_type" text,
"num_openings" integer,
"pct_openable" integer,
"trickle_vent_area_mm2" integer,
"num_trickle_vents" integer,
CONSTRAINT "magic_plan_window_ventilation_magic_plan_window_id_unique" UNIQUE("magic_plan_window_id")
);
--> statement-breakpoint
ALTER TABLE "magic_plan_door" ADD COLUMN "height_mm" real;--> statement-breakpoint
ALTER TABLE "magic_plan_door_ventilation" ADD CONSTRAINT "magic_plan_door_ventilation_magic_plan_door_id_magic_plan_door_id_fk" FOREIGN KEY ("magic_plan_door_id") REFERENCES "public"."magic_plan_door"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "magic_plan_window_ventilation" ADD CONSTRAINT "magic_plan_window_ventilation_magic_plan_window_id_magic_plan_window_id_fk" FOREIGN KEY ("magic_plan_window_id") REFERENCES "public"."magic_plan_window"("id") ON DELETE cascade ON UPDATE no action;

View file

@ -0,0 +1,10 @@
-- Backfill opening_type into magic_plan_window_ventilation from magic_plan_window.
-- Only rows with a non-null opening_type are backfilled; no ventilation row is
-- created for windows that never had one. The unique constraint on
-- magic_plan_window_id prevents duplicate rows if this runs more than once.
INSERT INTO magic_plan_window_ventilation (magic_plan_window_id, opening_type)
SELECT id, opening_type
FROM magic_plan_window
WHERE opening_type IS NOT NULL
ON CONFLICT (magic_plan_window_id) DO NOTHING;

View file

@ -0,0 +1 @@
ALTER TABLE "magic_plan_window" DROP COLUMN "opening_type";

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1513,6 +1513,90 @@
"when": 1779992128370,
"tag": "0216_add_subtask_service",
"breakpoints": true
},
{
"idx": 217,
"version": "7",
"when": 1780404222902,
"tag": "0217_gray_hellion",
"breakpoints": true
},
{
"idx": 218,
"version": "7",
"when": 1780408378351,
"tag": "0218_natural_umar",
"breakpoints": true
},
{
"idx": 219,
"version": "7",
"when": 1780419959831,
"tag": "0219_add_verify_ack",
"breakpoints": true
},
{
"idx": 220,
"version": "7",
"when": 1780491109956,
"tag": "0220_round_retro_girl",
"breakpoints": true
},
{
"idx": 221,
"version": "7",
"when": 1780566543108,
"tag": "0221_nice_sumo",
"breakpoints": true
},
{
"idx": 222,
"version": "7",
"when": 1780647165601,
"tag": "0222_nifty_hellcat",
"breakpoints": true
},
{
"idx": 223,
"version": "7",
"when": 1780647248894,
"tag": "0223_recommendation_plan_id_backfill",
"breakpoints": true
},
{
"idx": 224,
"version": "7",
"when": 1780653770494,
"tag": "0224_busy_nitro",
"breakpoints": true
},
{
"idx": 225,
"version": "7",
"when": 1780654800000,
"tag": "0225_recommendation_material_id_backfill",
"breakpoints": true
},
{
"idx": 226,
"version": "7",
"when": 1780910378978,
"tag": "0226_massive_guardian",
"breakpoints": true
},
{
"idx": 227,
"version": "7",
"when": 1780910400000,
"tag": "0227_magic_plan_window_ventilation_backfill",
"breakpoints": true
},
{
"idx": 228,
"version": "7",
"when": 1780910657380,
"tag": "0228_deep_betty_ross",
"breakpoints": true
}
]
}

View file

@ -9,6 +9,7 @@ export const magicPlanDoor = pgTable(
.notNull()
.references(() => magicPlanRoom.id),
widthMm: real("width_mm"),
heightMm: real("height_mm"),
type: text("type"),
},
);

View file

@ -0,0 +1,14 @@
import { pgTable, bigserial, bigint, real } from "drizzle-orm/pg-core";
import { magicPlanDoor } from "./door";
export const magicPlanDoorVentilation = pgTable(
"magic_plan_door_ventilation",
{
id: bigserial("id", { mode: "bigint" }).primaryKey(),
doorId: bigint("magic_plan_door_id", { mode: "bigint" })
.notNull()
.unique()
.references(() => magicPlanDoor.id, { onDelete: "cascade" }),
undercutMm: real("undercut_mm"),
},
);

View file

@ -1,16 +1,15 @@
import { pgTable, bigserial, bigint, text, real } from "drizzle-orm/pg-core";
import { pgTable, bigserial, bigint, real } from "drizzle-orm/pg-core";
import { magicPlanRoom } from "./room";
export const magicPlanWindow = pgTable(
"magic_plan_window",
{
id: bigserial("id", { mode: "bigint" }).primaryKey(),
roomId: bigint("magic_plan_room_id", { mode: "bigint" })
id: bigserial("id", { mode: "bigint" }).primaryKey(),
roomId: bigint("magic_plan_room_id", { mode: "bigint" })
.notNull()
.references(() => magicPlanRoom.id),
widthM: real("width_m"),
heightM: real("height_m"),
areaM2: real("area_m2"),
openingType: text("opening_type"),
widthM: real("width_m"),
heightM: real("height_m"),
areaM2: real("area_m2"),
},
);

View file

@ -0,0 +1,18 @@
import { pgTable, bigserial, bigint, text, integer } from "drizzle-orm/pg-core";
import { magicPlanWindow } from "./window";
export const magicPlanWindowVentilation = pgTable(
"magic_plan_window_ventilation",
{
id: bigserial("id", { mode: "bigint" }).primaryKey(),
windowId: bigint("magic_plan_window_id", { mode: "bigint" })
.notNull()
.unique()
.references(() => magicPlanWindow.id, { onDelete: "cascade" }),
openingType: text("opening_type"),
numOpenings: integer("num_openings"),
pctOpenable: integer("pct_openable"),
trickleVentAreaMm2: integer("trickle_vent_area_mm2"),
numTrickleVents: integer("num_trickle_vents"),
},
);