From 4cdb21fbbc5c91de0c7d0c7be16d811ab4105f3c Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 23 Apr 2026 12:01:17 +0000 Subject: [PATCH] save current changes --- .claude/settings.local.json | 4 +- .devcontainer/docker-compose.yml | 1 + .../[uploadId]/combined-results/route.ts | 173 ++++++++++++++---- .../[uploadId]/OnboardingProgress.tsx | 9 - .../[uploadId]/confirm-matches/page.tsx | 8 +- 5 files changed, 146 insertions(+), 49 deletions(-) diff --git a/.claude/settings.local.json b/.claude/settings.local.json index dddaa7e4..6aad4180 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -14,7 +14,9 @@ "Read(//workspaces/home/github/Model/**)", "Bash(pytest backend/tests/test_bulk_combiner_status.py -v --no-cov)", "Bash(echo \"EXIT: $?\")", - "mcp__backlog__task_list" + "mcp__backlog__task_list", + "Bash(grep -E \"\\\\.\\(prisma|sql|ts\\)$\")", + "Bash(xargs cat *)" ] }, "enabledMcpjsonServers": [ diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml index 2477976d..e42032ba 100644 --- a/.devcontainer/docker-compose.yml +++ b/.devcontainer/docker-compose.yml @@ -35,3 +35,4 @@ networks: driver: bridge shared-dev: external: true + name: shared-dev diff --git a/src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/combined-results/route.ts b/src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/combined-results/route.ts index 44f74fa2..d0cf1ca0 100644 --- a/src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/combined-results/route.ts +++ b/src/app/api/portfolio/[portfolioId]/bulk-uploads/[uploadId]/combined-results/route.ts @@ -4,6 +4,50 @@ import { eq } from "drizzle-orm"; import { NextRequest, NextResponse } from "next/server"; import { getServerSession } from "next-auth"; import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions"; +import S3 from "aws-sdk/clients/s3"; +import * as XLSX from "xlsx"; + +const ADDRESS_COLS = ["Address 1", "Address 2", "Address 3", "postcode"] as const; +const INTERNAL_REF_COL = "Internal Reference"; +const UPRN_COL = "address2uprn_uprn"; +const MATCHED_ADDRESS_COL = "address2uprn_address"; +const LEXISCORE_COL = "address2uprn_lexiscore"; +const MISSING_SENTINEL = "invalid postcode"; +const HIGH_THRESHOLD = 0.85; +const MED_THRESHOLD = 0.65; + +type ScoreBucket = "high" | "med" | "low" | null; + +function scoreBucket(score: number | null): ScoreBucket { + if (score === null) return null; + if (score >= HIGH_THRESHOLD) return "high"; + if (score >= MED_THRESHOLD) return "med"; + return "low"; +} + +function normalize(v: unknown): string { + if (v === null || v === undefined) return ""; + return String(v).trim(); +} + +function isMissingUprn(uprn: string): boolean { + return uprn === "" || uprn.toLowerCase() === MISSING_SENTINEL; +} + +function parseLexiscore(raw: unknown): number | null { + const val = normalize(raw); + if (!val || val.toLowerCase() === MISSING_SENTINEL) return null; + const n = Number(val); + return Number.isFinite(n) ? n : null; +} + +function parseS3Uri(uri: string): { bucket: string; key: string } | null { + if (!uri.startsWith("s3://")) return null; + const rest = uri.slice(5); + const slash = rest.indexOf("/"); + if (slash < 0) return null; + return { bucket: rest.slice(0, slash), key: rest.slice(slash + 1) }; +} export async function GET( request: NextRequest, @@ -15,53 +59,108 @@ export async function GET( const { uploadId } = await params; const [upload] = await db - .select({ taskId: bulkAddressUploads.taskId }) + .select({ + combinedOutputS3Uri: bulkAddressUploads.combinedOutputS3Uri, + }) .from(bulkAddressUploads) .where(eq(bulkAddressUploads.id, uploadId)) .limit(1); if (!upload) return NextResponse.json({ error: "Not found" }, { status: 404 }); - if (!upload.taskId) return NextResponse.json({ error: "Task not started" }, { status: 409 }); + if (!upload.combinedOutputS3Uri) + return NextResponse.json({ error: "Combiner not finished" }, { status: 409 }); - const fastapiUrl = process.env.FASTAPI_API_URL; - const fastapiKey = process.env.FASTAPI_API_KEY; - if (!fastapiUrl || !fastapiKey) { - console.error("FASTAPI_API_URL or FASTAPI_API_KEY not set"); - return NextResponse.json({ error: "Server misconfiguration" }, { status: 500 }); - } - - const sessionToken = - request.cookies.get("__Secure-next-auth.session-token")?.value ?? - request.cookies.get("next-auth.session-token")?.value; + const parsed = parseS3Uri(upload.combinedOutputS3Uri); + if (!parsed) + return NextResponse.json({ error: "Invalid combined output S3 URI" }, { status: 500 }); const { searchParams } = new URL(request.url); - const offset = searchParams.get("offset") ?? "0"; - const limit = searchParams.get("limit") ?? "500"; + const offset = Math.max(0, parseInt(searchParams.get("offset") ?? "0", 10) || 0); + const limit = Math.max(1, Math.min(5000, parseInt(searchParams.get("limit") ?? "500", 10) || 500)); + const s3 = new S3({ + region: process.env.RETROFIT_DATA_DEV_REGION, + accessKeyId: process.env.RETROFIT_DATA_DEV_ACCESS_KEY, + secretAccessKey: process.env.RETROFIT_DATA_DEV_SECRET_KEY, + }); + + let rawRows: Record[]; try { - const res = await fetch( - `${fastapiUrl}/v1/bulk-uploads/${upload.taskId}/combined-results?offset=${offset}&limit=${limit}`, - { - headers: { - "x-api-key": fastapiKey, - Authorization: `Bearer ${sessionToken}`, - }, - } - ); - - if (!res.ok) { - const errText = await res.text().catch(() => ""); - console.error("Backend combined-results failed:", res.status, errText); - return NextResponse.json( - { error: res.status === 409 ? "Combiner not finished" : "Failed to fetch results" }, - { status: res.status === 409 ? 409 : 502 } - ); - } - - const data = await res.json(); - return NextResponse.json(data, { status: 200 }); + const obj = await s3 + .getObject({ Bucket: parsed.bucket, Key: parsed.key }) + .promise(); + const buf = Buffer.from(obj.Body as Uint8Array); + const wb = XLSX.read(buf, { type: "buffer" }); + const sheet = wb.Sheets[wb.SheetNames[0]]; + rawRows = XLSX.utils.sheet_to_json>(sheet, { defval: "" }); } catch (err) { - console.error("Failed to reach backend combined-results:", err); - return NextResponse.json({ error: "Failed to fetch results" }, { status: 502 }); + console.error("Failed to read combined CSV from S3:", err); + return NextResponse.json({ error: "Failed to read combined CSV" }, { status: 502 }); } + + const uprnValues = rawRows.map((r) => normalize(r[UPRN_COL])); + const uprnCounts = new Map(); + for (const u of uprnValues) { + if (isMissingUprn(u)) continue; + uprnCounts.set(u, (uprnCounts.get(u) ?? 0) + 1); + } + const duplicateUprns = new Set( + Array.from(uprnCounts.entries()) + .filter(([, c]) => c >= 2) + .map(([u]) => u) + ); + + const missingCount = uprnValues.filter(isMissingUprn).length; + const duplicateCount = uprnValues.filter((u) => duplicateUprns.has(u)).length; + const matchedCount = rawRows.length - missingCount; + + const page = rawRows.slice(offset, offset + limit); + const rows = page.map((raw, i) => { + const rowIndex = offset + i; + const addressParts = ADDRESS_COLS.map((c) => normalize(raw[c])).filter(Boolean); + const inputAddress = addressParts.join(", "); + const internalRef = normalize(raw[INTERNAL_REF_COL]) || null; + + const uprnRaw = normalize(raw[UPRN_COL]); + const uprn = isMissingUprn(uprnRaw) ? null : uprnRaw; + + const matchedAddressRaw = normalize(raw[MATCHED_ADDRESS_COL]); + const matchedAddress = + !matchedAddressRaw || matchedAddressRaw.toLowerCase() === MISSING_SENTINEL + ? null + : matchedAddressRaw; + + const lexiscore = parseLexiscore(raw[LEXISCORE_COL]); + + const flags: ("duplicate" | "missing")[] = []; + if (uprn === null) flags.push("missing"); + else if (duplicateUprns.has(uprn)) flags.push("duplicate"); + + return { + row_index: rowIndex, + input_address: inputAddress, + internal_reference: internalRef, + uprn, + matched_address: matchedAddress, + lexiscore, + score_bucket: scoreBucket(lexiscore), + flags, + }; + }); + + return NextResponse.json( + { + task_id: uploadId, + total: rawRows.length, + offset, + limit, + flags_summary: { + duplicates: duplicateCount, + missing: missingCount, + matched: matchedCount, + }, + rows, + }, + { status: 200 } + ); } diff --git a/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx b/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx index 7effd7f7..2eae693a 100644 --- a/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx +++ b/src/app/portfolio/[slug]/(portfolio)/bulk-upload/[uploadId]/OnboardingProgress.tsx @@ -149,15 +149,6 @@ export default function OnboardingProgress({ )} - {isAwaitingReview && ( - - Review matches - - )} - {isDomnaUser && ( ({})); + const upstreamStatus = body?.upstreamStatus; + const upstreamBody = body?.upstreamBody; + fetchError = `Failed to load results (${res.status})${upstreamStatus ? ` · upstream ${upstreamStatus}` : ""}${upstreamBody ? ` · ${upstreamBody}` : ""}`; + console.error("Confirm-matches fetch error:", { status: res.status, body }); } else { data = (await res.json()) as CombinedResultsResponse; } } catch (err) { console.error("Failed to fetch combined-results:", err); - fetchError = "Failed to load results"; + fetchError = `Failed to load results · ${err instanceof Error ? err.message : String(err)}`; } return (