feat: whyrating - initial project from turbostarter boilerplate
This commit is contained in:
63
packages/api/tests/regenerate-embeddings.ts
Normal file
63
packages/api/tests/regenerate-embeddings.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
/**
|
||||
* Script to regenerate embeddings for documents with 0 embeddings
|
||||
* Run: pnpm with-env npx tsx packages/api/tests/regenerate-embeddings.ts
|
||||
*/
|
||||
import { generateDocumentEmbeddings } from "@turbostarter/ai/pdf/embeddings";
|
||||
import { sql } from "@turbostarter/db";
|
||||
import { pdfEmbedding } from "@turbostarter/db/schema/pdf";
|
||||
import { db } from "@turbostarter/db/server";
|
||||
|
||||
async function regenerate() {
|
||||
// Find documents with 0 embeddings
|
||||
const orphans = await db.execute<{
|
||||
id: string;
|
||||
path: string;
|
||||
chat_id: string;
|
||||
}>(sql`
|
||||
SELECT d.id, d.path, d.chat_id
|
||||
FROM pdf.document d
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM pdf.embedding e WHERE e.document_id = d.id
|
||||
)
|
||||
`);
|
||||
|
||||
console.log(`Found ${orphans.length} documents without embeddings`);
|
||||
|
||||
for (const doc of orphans) {
|
||||
console.log(`\nProcessing document: ${doc.id}`);
|
||||
console.log(` Path: ${doc.path}`);
|
||||
|
||||
try {
|
||||
const generated = await generateDocumentEmbeddings(doc.path);
|
||||
console.log(` Generated ${generated.length} chunks`);
|
||||
|
||||
if (generated.length > 0) {
|
||||
await db
|
||||
.insert(pdfEmbedding)
|
||||
.values(
|
||||
generated.map((chunk) => ({
|
||||
content: chunk.content,
|
||||
documentId: doc.id,
|
||||
embedding: chunk.embedding,
|
||||
pageNumber: chunk.metadata.pageNumber,
|
||||
charStart: chunk.metadata.charStart,
|
||||
charEnd: chunk.metadata.charEnd,
|
||||
sectionTitle: chunk.metadata.sectionTitle,
|
||||
})),
|
||||
)
|
||||
.onConflictDoNothing();
|
||||
console.log(` ✅ Inserted embeddings`);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(` ❌ Error:`, error instanceof Error ? error.message : error);
|
||||
}
|
||||
}
|
||||
|
||||
console.log("\nDone!");
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
regenerate().catch((e) => {
|
||||
console.error("Fatal error:", e);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user