mirror of
https://github.com/Hestia-Homes/assessment-model.git
synced 2026-06-30 12:55:02 +00:00
only get most recent version of each uploaded file per s3 key
This commit is contained in:
parent
861bf144ae
commit
ac6983ae68
1 changed files with 25 additions and 1 deletions
|
|
@ -40,7 +40,31 @@ export async function GET(req: Request) {
|
|||
.from(uploadedFiles)
|
||||
.where(condition);
|
||||
|
||||
const documents = rows.map((row) => ({
|
||||
// Deduplicate rows that arise when the ingestion Lambda re-triggers for a deal
|
||||
// that was already processed: same S3 key gets a new version, but a new DB row
|
||||
// is written each time.
|
||||
//
|
||||
// Step 1: same s3FileKey → keep the row with the highest id (latest insert).
|
||||
const latestByS3Key = new Map<string, (typeof rows)[number]>();
|
||||
for (const row of rows) {
|
||||
const existing = latestByS3Key.get(row.s3FileKey);
|
||||
if (!existing || row.id > existing.id) latestByS3Key.set(row.s3FileKey, row);
|
||||
}
|
||||
|
||||
// Step 2: among distinct keys, same (fileType, measureName) → keep latest.
|
||||
// Rows with null fileType (unclassified) are kept as-is.
|
||||
const latestByDocType = new Map<string, (typeof rows)[number]>();
|
||||
const unclassified: (typeof rows)[number][] = [];
|
||||
for (const row of latestByS3Key.values()) {
|
||||
if (!row.fileType) { unclassified.push(row); continue; }
|
||||
const key = `${row.fileType}:${row.measureName ?? ""}`;
|
||||
const existing = latestByDocType.get(key);
|
||||
if (!existing || row.id > existing.id) latestByDocType.set(key, row);
|
||||
}
|
||||
|
||||
const dedupedRows = [...latestByDocType.values(), ...unclassified];
|
||||
|
||||
const documents = dedupedRows.map((row) => ({
|
||||
id: String(row.id),
|
||||
s3FileKey: row.s3FileKey,
|
||||
s3FileBucket: row.s3FileBucket,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue