save current changes

This commit is contained in:
Jun-te Kim 2026-04-23 12:01:17 +00:00
parent bee57a56b5
commit 4cdb21fbbc
5 changed files with 146 additions and 49 deletions

View file

@ -14,7 +14,9 @@
"Read(//workspaces/home/github/Model/**)",
"Bash(pytest backend/tests/test_bulk_combiner_status.py -v --no-cov)",
"Bash(echo \"EXIT: $?\")",
"mcp__backlog__task_list"
"mcp__backlog__task_list",
"Bash(grep -E \"\\\\.\\(prisma|sql|ts\\)$\")",
"Bash(xargs cat *)"
]
},
"enabledMcpjsonServers": [

View file

@ -35,3 +35,4 @@ networks:
driver: bridge
shared-dev:
external: true
name: shared-dev

View file

@ -4,6 +4,50 @@ import { eq } from "drizzle-orm";
import { NextRequest, NextResponse } from "next/server";
import { getServerSession } from "next-auth";
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
import S3 from "aws-sdk/clients/s3";
import * as XLSX from "xlsx";
const ADDRESS_COLS = ["Address 1", "Address 2", "Address 3", "postcode"] as const;
const INTERNAL_REF_COL = "Internal Reference";
const UPRN_COL = "address2uprn_uprn";
const MATCHED_ADDRESS_COL = "address2uprn_address";
const LEXISCORE_COL = "address2uprn_lexiscore";
const MISSING_SENTINEL = "invalid postcode";
const HIGH_THRESHOLD = 0.85;
const MED_THRESHOLD = 0.65;
type ScoreBucket = "high" | "med" | "low" | null;
function scoreBucket(score: number | null): ScoreBucket {
if (score === null) return null;
if (score >= HIGH_THRESHOLD) return "high";
if (score >= MED_THRESHOLD) return "med";
return "low";
}
function normalize(v: unknown): string {
if (v === null || v === undefined) return "";
return String(v).trim();
}
function isMissingUprn(uprn: string): boolean {
return uprn === "" || uprn.toLowerCase() === MISSING_SENTINEL;
}
function parseLexiscore(raw: unknown): number | null {
const val = normalize(raw);
if (!val || val.toLowerCase() === MISSING_SENTINEL) return null;
const n = Number(val);
return Number.isFinite(n) ? n : null;
}
function parseS3Uri(uri: string): { bucket: string; key: string } | null {
if (!uri.startsWith("s3://")) return null;
const rest = uri.slice(5);
const slash = rest.indexOf("/");
if (slash < 0) return null;
return { bucket: rest.slice(0, slash), key: rest.slice(slash + 1) };
}
export async function GET(
request: NextRequest,
@ -15,53 +59,108 @@ export async function GET(
const { uploadId } = await params;
const [upload] = await db
.select({ taskId: bulkAddressUploads.taskId })
.select({
combinedOutputS3Uri: bulkAddressUploads.combinedOutputS3Uri,
})
.from(bulkAddressUploads)
.where(eq(bulkAddressUploads.id, uploadId))
.limit(1);
if (!upload) return NextResponse.json({ error: "Not found" }, { status: 404 });
if (!upload.taskId) return NextResponse.json({ error: "Task not started" }, { status: 409 });
if (!upload.combinedOutputS3Uri)
return NextResponse.json({ error: "Combiner not finished" }, { status: 409 });
const fastapiUrl = process.env.FASTAPI_API_URL;
const fastapiKey = process.env.FASTAPI_API_KEY;
if (!fastapiUrl || !fastapiKey) {
console.error("FASTAPI_API_URL or FASTAPI_API_KEY not set");
return NextResponse.json({ error: "Server misconfiguration" }, { status: 500 });
}
const sessionToken =
request.cookies.get("__Secure-next-auth.session-token")?.value ??
request.cookies.get("next-auth.session-token")?.value;
const parsed = parseS3Uri(upload.combinedOutputS3Uri);
if (!parsed)
return NextResponse.json({ error: "Invalid combined output S3 URI" }, { status: 500 });
const { searchParams } = new URL(request.url);
const offset = searchParams.get("offset") ?? "0";
const limit = searchParams.get("limit") ?? "500";
const offset = Math.max(0, parseInt(searchParams.get("offset") ?? "0", 10) || 0);
const limit = Math.max(1, Math.min(5000, parseInt(searchParams.get("limit") ?? "500", 10) || 500));
const s3 = new S3({
region: process.env.RETROFIT_DATA_DEV_REGION,
accessKeyId: process.env.RETROFIT_DATA_DEV_ACCESS_KEY,
secretAccessKey: process.env.RETROFIT_DATA_DEV_SECRET_KEY,
});
let rawRows: Record<string, unknown>[];
try {
const res = await fetch(
`${fastapiUrl}/v1/bulk-uploads/${upload.taskId}/combined-results?offset=${offset}&limit=${limit}`,
{
headers: {
"x-api-key": fastapiKey,
Authorization: `Bearer ${sessionToken}`,
},
}
);
if (!res.ok) {
const errText = await res.text().catch(() => "");
console.error("Backend combined-results failed:", res.status, errText);
return NextResponse.json(
{ error: res.status === 409 ? "Combiner not finished" : "Failed to fetch results" },
{ status: res.status === 409 ? 409 : 502 }
);
}
const data = await res.json();
return NextResponse.json(data, { status: 200 });
const obj = await s3
.getObject({ Bucket: parsed.bucket, Key: parsed.key })
.promise();
const buf = Buffer.from(obj.Body as Uint8Array);
const wb = XLSX.read(buf, { type: "buffer" });
const sheet = wb.Sheets[wb.SheetNames[0]];
rawRows = XLSX.utils.sheet_to_json<Record<string, unknown>>(sheet, { defval: "" });
} catch (err) {
console.error("Failed to reach backend combined-results:", err);
return NextResponse.json({ error: "Failed to fetch results" }, { status: 502 });
console.error("Failed to read combined CSV from S3:", err);
return NextResponse.json({ error: "Failed to read combined CSV" }, { status: 502 });
}
const uprnValues = rawRows.map((r) => normalize(r[UPRN_COL]));
const uprnCounts = new Map<string, number>();
for (const u of uprnValues) {
if (isMissingUprn(u)) continue;
uprnCounts.set(u, (uprnCounts.get(u) ?? 0) + 1);
}
const duplicateUprns = new Set(
Array.from(uprnCounts.entries())
.filter(([, c]) => c >= 2)
.map(([u]) => u)
);
const missingCount = uprnValues.filter(isMissingUprn).length;
const duplicateCount = uprnValues.filter((u) => duplicateUprns.has(u)).length;
const matchedCount = rawRows.length - missingCount;
const page = rawRows.slice(offset, offset + limit);
const rows = page.map((raw, i) => {
const rowIndex = offset + i;
const addressParts = ADDRESS_COLS.map((c) => normalize(raw[c])).filter(Boolean);
const inputAddress = addressParts.join(", ");
const internalRef = normalize(raw[INTERNAL_REF_COL]) || null;
const uprnRaw = normalize(raw[UPRN_COL]);
const uprn = isMissingUprn(uprnRaw) ? null : uprnRaw;
const matchedAddressRaw = normalize(raw[MATCHED_ADDRESS_COL]);
const matchedAddress =
!matchedAddressRaw || matchedAddressRaw.toLowerCase() === MISSING_SENTINEL
? null
: matchedAddressRaw;
const lexiscore = parseLexiscore(raw[LEXISCORE_COL]);
const flags: ("duplicate" | "missing")[] = [];
if (uprn === null) flags.push("missing");
else if (duplicateUprns.has(uprn)) flags.push("duplicate");
return {
row_index: rowIndex,
input_address: inputAddress,
internal_reference: internalRef,
uprn,
matched_address: matchedAddress,
lexiscore,
score_bucket: scoreBucket(lexiscore),
flags,
};
});
return NextResponse.json(
{
task_id: uploadId,
total: rawRows.length,
offset,
limit,
flags_summary: {
duplicates: duplicateCount,
missing: missingCount,
matched: matchedCount,
},
rows,
},
{ status: 200 }
);
}

View file

@ -149,15 +149,6 @@ export default function OnboardingProgress({
)}
</div>
{isAwaitingReview && (
<Link
href={`/portfolio/${portfolioSlug}/bulk-upload/${uploadId}/confirm-matches`}
className="inline-flex items-center gap-2 px-5 py-2 rounded-xl bg-gradient-to-br from-[#14163d] to-[#15173e] text-white text-sm font-bold hover:opacity-90 transition-opacity"
>
Review matches
</Link>
)}
{isDomnaUser && (
<Link
href={`/portfolio/${portfolioSlug}/settings/logs`}

View file

@ -52,13 +52,17 @@ export default async function ConfirmMatchesPage(props: {
try {
const res = await fetch(url, { headers: { Cookie: cookieHeader }, cache: "no-store" });
if (!res.ok) {
fetchError = `Failed to load results (${res.status})`;
const body = await res.json().catch(() => ({}));
const upstreamStatus = body?.upstreamStatus;
const upstreamBody = body?.upstreamBody;
fetchError = `Failed to load results (${res.status})${upstreamStatus ? ` · upstream ${upstreamStatus}` : ""}${upstreamBody ? ` · ${upstreamBody}` : ""}`;
console.error("Confirm-matches fetch error:", { status: res.status, body });
} else {
data = (await res.json()) as CombinedResultsResponse;
}
} catch (err) {
console.error("Failed to fetch combined-results:", err);
fetchError = "Failed to load results";
fetchError = `Failed to load results · ${err instanceof Error ? err.message : String(err)}`;
}
return (