(
chunkId: string,
updateData: {
content?: string
enabled?: boolean
},
requestId: string,
workspaceId?: string | null
)
| 328 | * Update a single chunk |
| 329 | */ |
| 330 | export async function updateChunk( |
| 331 | chunkId: string, |
| 332 | updateData: { |
| 333 | content?: string |
| 334 | enabled?: boolean |
| 335 | }, |
| 336 | requestId: string, |
| 337 | workspaceId?: string | null |
| 338 | ): Promise<ChunkData> { |
| 339 | // Content updates run in a transaction to keep document statistics |
| 340 | // consistent. The embedding API call happens BEFORE the transaction opens so |
| 341 | // a held pooled connection never waits on external I/O; the transaction then |
| 342 | // re-reads the chunk under a row lock and retries the whole flow in the rare |
| 343 | // case a concurrent edit invalidated the regeneration decision. |
| 344 | if (updateData.content !== undefined && typeof updateData.content === 'string') { |
| 345 | const content = updateData.content |
| 346 | const MAX_UPDATE_ATTEMPTS = 3 |
| 347 | |
| 348 | for (let attempt = 1; attempt <= MAX_UPDATE_ATTEMPTS; attempt++) { |
| 349 | const [preRead] = await db |
| 350 | .select({ documentId: embedding.documentId, content: embedding.content }) |
| 351 | .from(embedding) |
| 352 | .where(eq(embedding.id, chunkId)) |
| 353 | .limit(1) |
| 354 | |
| 355 | if (!preRead) { |
| 356 | throw new Error(`Chunk ${chunkId} not found`) |
| 357 | } |
| 358 | |
| 359 | // The embedding is a function of the new content alone, so generating it |
| 360 | // outside the transaction is always valid. |
| 361 | let regenerated: { embedding: number[]; tokenCount: number } | null = null |
| 362 | if (content !== preRead.content) { |
| 363 | const kbRow = await db |
| 364 | .select({ embeddingModel: knowledgeBase.embeddingModel }) |
| 365 | .from(knowledgeBase) |
| 366 | .innerJoin(document, eq(document.knowledgeBaseId, knowledgeBase.id)) |
| 367 | .where(eq(document.id, preRead.documentId)) |
| 368 | .limit(1) |
| 369 | const chunkEmbeddingModel = kbRow[0]?.embeddingModel |
| 370 | if (!chunkEmbeddingModel) { |
| 371 | throw new Error('Knowledge base for chunk not found') |
| 372 | } |
| 373 | |
| 374 | logger.info(`[${requestId}] Content changed, regenerating embedding for chunk ${chunkId}`) |
| 375 | const { embeddings } = await generateEmbeddings([content], chunkEmbeddingModel, workspaceId) |
| 376 | regenerated = { |
| 377 | embedding: embeddings[0], |
| 378 | tokenCount: estimateTokenCount( |
| 379 | content, |
| 380 | getEmbeddingModelInfo(chunkEmbeddingModel).tokenizerProvider |
| 381 | ).count, |
| 382 | } |
| 383 | } |
| 384 | |
| 385 | const result = await db.transaction(async (tx) => { |
| 386 | const currentChunk = await tx |
| 387 | .select({ |
no test coverage detected