mirror of
https://github.com/Hestia-Homes/assessment-model.git
synced 2026-06-30 12:55:02 +00:00
save current changes
This commit is contained in:
parent
bee57a56b5
commit
4cdb21fbbc
5 changed files with 146 additions and 49 deletions
|
|
@ -14,7 +14,9 @@
|
|||
"Read(//workspaces/home/github/Model/**)",
|
||||
"Bash(pytest backend/tests/test_bulk_combiner_status.py -v --no-cov)",
|
||||
"Bash(echo \"EXIT: $?\")",
|
||||
"mcp__backlog__task_list"
|
||||
"mcp__backlog__task_list",
|
||||
"Bash(grep -E \"\\\\.\\(prisma|sql|ts\\)$\")",
|
||||
"Bash(xargs cat *)"
|
||||
]
|
||||
},
|
||||
"enabledMcpjsonServers": [
|
||||
|
|
|
|||
|
|
@ -35,3 +35,4 @@ networks:
|
|||
driver: bridge
|
||||
shared-dev:
|
||||
external: true
|
||||
name: shared-dev
|
||||
|
|
|
|||
|
|
@ -4,6 +4,50 @@ import { eq } from "drizzle-orm";
|
|||
import { NextRequest, NextResponse } from "next/server";
|
||||
import { getServerSession } from "next-auth";
|
||||
import { AuthOptions } from "@/app/api/auth/[...nextauth]/authOptions";
|
||||
import S3 from "aws-sdk/clients/s3";
|
||||
import * as XLSX from "xlsx";
|
||||
|
||||
const ADDRESS_COLS = ["Address 1", "Address 2", "Address 3", "postcode"] as const;
|
||||
const INTERNAL_REF_COL = "Internal Reference";
|
||||
const UPRN_COL = "address2uprn_uprn";
|
||||
const MATCHED_ADDRESS_COL = "address2uprn_address";
|
||||
const LEXISCORE_COL = "address2uprn_lexiscore";
|
||||
const MISSING_SENTINEL = "invalid postcode";
|
||||
const HIGH_THRESHOLD = 0.85;
|
||||
const MED_THRESHOLD = 0.65;
|
||||
|
||||
type ScoreBucket = "high" | "med" | "low" | null;
|
||||
|
||||
function scoreBucket(score: number | null): ScoreBucket {
|
||||
if (score === null) return null;
|
||||
if (score >= HIGH_THRESHOLD) return "high";
|
||||
if (score >= MED_THRESHOLD) return "med";
|
||||
return "low";
|
||||
}
|
||||
|
||||
function normalize(v: unknown): string {
|
||||
if (v === null || v === undefined) return "";
|
||||
return String(v).trim();
|
||||
}
|
||||
|
||||
function isMissingUprn(uprn: string): boolean {
|
||||
return uprn === "" || uprn.toLowerCase() === MISSING_SENTINEL;
|
||||
}
|
||||
|
||||
function parseLexiscore(raw: unknown): number | null {
|
||||
const val = normalize(raw);
|
||||
if (!val || val.toLowerCase() === MISSING_SENTINEL) return null;
|
||||
const n = Number(val);
|
||||
return Number.isFinite(n) ? n : null;
|
||||
}
|
||||
|
||||
function parseS3Uri(uri: string): { bucket: string; key: string } | null {
|
||||
if (!uri.startsWith("s3://")) return null;
|
||||
const rest = uri.slice(5);
|
||||
const slash = rest.indexOf("/");
|
||||
if (slash < 0) return null;
|
||||
return { bucket: rest.slice(0, slash), key: rest.slice(slash + 1) };
|
||||
}
|
||||
|
||||
export async function GET(
|
||||
request: NextRequest,
|
||||
|
|
@ -15,53 +59,108 @@ export async function GET(
|
|||
const { uploadId } = await params;
|
||||
|
||||
const [upload] = await db
|
||||
.select({ taskId: bulkAddressUploads.taskId })
|
||||
.select({
|
||||
combinedOutputS3Uri: bulkAddressUploads.combinedOutputS3Uri,
|
||||
})
|
||||
.from(bulkAddressUploads)
|
||||
.where(eq(bulkAddressUploads.id, uploadId))
|
||||
.limit(1);
|
||||
|
||||
if (!upload) return NextResponse.json({ error: "Not found" }, { status: 404 });
|
||||
if (!upload.taskId) return NextResponse.json({ error: "Task not started" }, { status: 409 });
|
||||
if (!upload.combinedOutputS3Uri)
|
||||
return NextResponse.json({ error: "Combiner not finished" }, { status: 409 });
|
||||
|
||||
const fastapiUrl = process.env.FASTAPI_API_URL;
|
||||
const fastapiKey = process.env.FASTAPI_API_KEY;
|
||||
if (!fastapiUrl || !fastapiKey) {
|
||||
console.error("FASTAPI_API_URL or FASTAPI_API_KEY not set");
|
||||
return NextResponse.json({ error: "Server misconfiguration" }, { status: 500 });
|
||||
}
|
||||
|
||||
const sessionToken =
|
||||
request.cookies.get("__Secure-next-auth.session-token")?.value ??
|
||||
request.cookies.get("next-auth.session-token")?.value;
|
||||
const parsed = parseS3Uri(upload.combinedOutputS3Uri);
|
||||
if (!parsed)
|
||||
return NextResponse.json({ error: "Invalid combined output S3 URI" }, { status: 500 });
|
||||
|
||||
const { searchParams } = new URL(request.url);
|
||||
const offset = searchParams.get("offset") ?? "0";
|
||||
const limit = searchParams.get("limit") ?? "500";
|
||||
const offset = Math.max(0, parseInt(searchParams.get("offset") ?? "0", 10) || 0);
|
||||
const limit = Math.max(1, Math.min(5000, parseInt(searchParams.get("limit") ?? "500", 10) || 500));
|
||||
|
||||
const s3 = new S3({
|
||||
region: process.env.RETROFIT_DATA_DEV_REGION,
|
||||
accessKeyId: process.env.RETROFIT_DATA_DEV_ACCESS_KEY,
|
||||
secretAccessKey: process.env.RETROFIT_DATA_DEV_SECRET_KEY,
|
||||
});
|
||||
|
||||
let rawRows: Record<string, unknown>[];
|
||||
try {
|
||||
const res = await fetch(
|
||||
`${fastapiUrl}/v1/bulk-uploads/${upload.taskId}/combined-results?offset=${offset}&limit=${limit}`,
|
||||
{
|
||||
headers: {
|
||||
"x-api-key": fastapiKey,
|
||||
Authorization: `Bearer ${sessionToken}`,
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
if (!res.ok) {
|
||||
const errText = await res.text().catch(() => "");
|
||||
console.error("Backend combined-results failed:", res.status, errText);
|
||||
return NextResponse.json(
|
||||
{ error: res.status === 409 ? "Combiner not finished" : "Failed to fetch results" },
|
||||
{ status: res.status === 409 ? 409 : 502 }
|
||||
);
|
||||
}
|
||||
|
||||
const data = await res.json();
|
||||
return NextResponse.json(data, { status: 200 });
|
||||
const obj = await s3
|
||||
.getObject({ Bucket: parsed.bucket, Key: parsed.key })
|
||||
.promise();
|
||||
const buf = Buffer.from(obj.Body as Uint8Array);
|
||||
const wb = XLSX.read(buf, { type: "buffer" });
|
||||
const sheet = wb.Sheets[wb.SheetNames[0]];
|
||||
rawRows = XLSX.utils.sheet_to_json<Record<string, unknown>>(sheet, { defval: "" });
|
||||
} catch (err) {
|
||||
console.error("Failed to reach backend combined-results:", err);
|
||||
return NextResponse.json({ error: "Failed to fetch results" }, { status: 502 });
|
||||
console.error("Failed to read combined CSV from S3:", err);
|
||||
return NextResponse.json({ error: "Failed to read combined CSV" }, { status: 502 });
|
||||
}
|
||||
|
||||
const uprnValues = rawRows.map((r) => normalize(r[UPRN_COL]));
|
||||
const uprnCounts = new Map<string, number>();
|
||||
for (const u of uprnValues) {
|
||||
if (isMissingUprn(u)) continue;
|
||||
uprnCounts.set(u, (uprnCounts.get(u) ?? 0) + 1);
|
||||
}
|
||||
const duplicateUprns = new Set(
|
||||
Array.from(uprnCounts.entries())
|
||||
.filter(([, c]) => c >= 2)
|
||||
.map(([u]) => u)
|
||||
);
|
||||
|
||||
const missingCount = uprnValues.filter(isMissingUprn).length;
|
||||
const duplicateCount = uprnValues.filter((u) => duplicateUprns.has(u)).length;
|
||||
const matchedCount = rawRows.length - missingCount;
|
||||
|
||||
const page = rawRows.slice(offset, offset + limit);
|
||||
const rows = page.map((raw, i) => {
|
||||
const rowIndex = offset + i;
|
||||
const addressParts = ADDRESS_COLS.map((c) => normalize(raw[c])).filter(Boolean);
|
||||
const inputAddress = addressParts.join(", ");
|
||||
const internalRef = normalize(raw[INTERNAL_REF_COL]) || null;
|
||||
|
||||
const uprnRaw = normalize(raw[UPRN_COL]);
|
||||
const uprn = isMissingUprn(uprnRaw) ? null : uprnRaw;
|
||||
|
||||
const matchedAddressRaw = normalize(raw[MATCHED_ADDRESS_COL]);
|
||||
const matchedAddress =
|
||||
!matchedAddressRaw || matchedAddressRaw.toLowerCase() === MISSING_SENTINEL
|
||||
? null
|
||||
: matchedAddressRaw;
|
||||
|
||||
const lexiscore = parseLexiscore(raw[LEXISCORE_COL]);
|
||||
|
||||
const flags: ("duplicate" | "missing")[] = [];
|
||||
if (uprn === null) flags.push("missing");
|
||||
else if (duplicateUprns.has(uprn)) flags.push("duplicate");
|
||||
|
||||
return {
|
||||
row_index: rowIndex,
|
||||
input_address: inputAddress,
|
||||
internal_reference: internalRef,
|
||||
uprn,
|
||||
matched_address: matchedAddress,
|
||||
lexiscore,
|
||||
score_bucket: scoreBucket(lexiscore),
|
||||
flags,
|
||||
};
|
||||
});
|
||||
|
||||
return NextResponse.json(
|
||||
{
|
||||
task_id: uploadId,
|
||||
total: rawRows.length,
|
||||
offset,
|
||||
limit,
|
||||
flags_summary: {
|
||||
duplicates: duplicateCount,
|
||||
missing: missingCount,
|
||||
matched: matchedCount,
|
||||
},
|
||||
rows,
|
||||
},
|
||||
{ status: 200 }
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -149,15 +149,6 @@ export default function OnboardingProgress({
|
|||
)}
|
||||
</div>
|
||||
|
||||
{isAwaitingReview && (
|
||||
<Link
|
||||
href={`/portfolio/${portfolioSlug}/bulk-upload/${uploadId}/confirm-matches`}
|
||||
className="inline-flex items-center gap-2 px-5 py-2 rounded-xl bg-gradient-to-br from-[#14163d] to-[#15173e] text-white text-sm font-bold hover:opacity-90 transition-opacity"
|
||||
>
|
||||
Review matches
|
||||
</Link>
|
||||
)}
|
||||
|
||||
{isDomnaUser && (
|
||||
<Link
|
||||
href={`/portfolio/${portfolioSlug}/settings/logs`}
|
||||
|
|
|
|||
|
|
@ -52,13 +52,17 @@ export default async function ConfirmMatchesPage(props: {
|
|||
try {
|
||||
const res = await fetch(url, { headers: { Cookie: cookieHeader }, cache: "no-store" });
|
||||
if (!res.ok) {
|
||||
fetchError = `Failed to load results (${res.status})`;
|
||||
const body = await res.json().catch(() => ({}));
|
||||
const upstreamStatus = body?.upstreamStatus;
|
||||
const upstreamBody = body?.upstreamBody;
|
||||
fetchError = `Failed to load results (${res.status})${upstreamStatus ? ` · upstream ${upstreamStatus}` : ""}${upstreamBody ? ` · ${upstreamBody}` : ""}`;
|
||||
console.error("Confirm-matches fetch error:", { status: res.status, body });
|
||||
} else {
|
||||
data = (await res.json()) as CombinedResultsResponse;
|
||||
}
|
||||
} catch (err) {
|
||||
console.error("Failed to fetch combined-results:", err);
|
||||
fetchError = "Failed to load results";
|
||||
fetchError = `Failed to load results · ${err instanceof Error ? err.message : String(err)}`;
|
||||
}
|
||||
|
||||
return (
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue