MCPcopy Index your code
hub / github.com/simstudioai/sim / updateChunk

Function updateChunk

apps/sim/lib/knowledge/chunks/service.ts:330–518  ·  view source on GitHub ↗
(
  chunkId: string,
  updateData: {
    content?: string
    enabled?: boolean
  },
  requestId: string,
  workspaceId?: string | null
)

Source from the content-addressed store, hash-verified

328 * Update a single chunk
329 */
330export async function updateChunk(
331 chunkId: string,
332 updateData: {
333 content?: string
334 enabled?: boolean
335 },
336 requestId: string,
337 workspaceId?: string | null
338): Promise<ChunkData> {
339 // Content updates run in a transaction to keep document statistics
340 // consistent. The embedding API call happens BEFORE the transaction opens so
341 // a held pooled connection never waits on external I/O; the transaction then
342 // re-reads the chunk under a row lock and retries the whole flow in the rare
343 // case a concurrent edit invalidated the regeneration decision.
344 if (updateData.content !== undefined && typeof updateData.content === 'string') {
345 const content = updateData.content
346 const MAX_UPDATE_ATTEMPTS = 3
347
348 for (let attempt = 1; attempt <= MAX_UPDATE_ATTEMPTS; attempt++) {
349 const [preRead] = await db
350 .select({ documentId: embedding.documentId, content: embedding.content })
351 .from(embedding)
352 .where(eq(embedding.id, chunkId))
353 .limit(1)
354
355 if (!preRead) {
356 throw new Error(`Chunk ${chunkId} not found`)
357 }
358
359 // The embedding is a function of the new content alone, so generating it
360 // outside the transaction is always valid.
361 let regenerated: { embedding: number[]; tokenCount: number } | null = null
362 if (content !== preRead.content) {
363 const kbRow = await db
364 .select({ embeddingModel: knowledgeBase.embeddingModel })
365 .from(knowledgeBase)
366 .innerJoin(document, eq(document.knowledgeBaseId, knowledgeBase.id))
367 .where(eq(document.id, preRead.documentId))
368 .limit(1)
369 const chunkEmbeddingModel = kbRow[0]?.embeddingModel
370 if (!chunkEmbeddingModel) {
371 throw new Error('Knowledge base for chunk not found')
372 }
373
374 logger.info(`[${requestId}] Content changed, regenerating embedding for chunk ${chunkId}`)
375 const { embeddings } = await generateEmbeddings([content], chunkEmbeddingModel, workspaceId)
376 regenerated = {
377 embedding: embeddings[0],
378 tokenCount: estimateTokenCount(
379 content,
380 getEmbeddingModelInfo(chunkEmbeddingModel).tokenizerProvider
381 ).count,
382 }
383 }
384
385 const result = await db.transaction(async (tx) => {
386 const currentChunk = await tx
387 .select({

Callers 1

route.tsFile · 0.90

Calls 7

generateEmbeddingsFunction · 0.90
estimateTokenCountFunction · 0.90
getEmbeddingModelInfoFunction · 0.90
sha256HexFunction · 0.90
infoMethod · 0.80
setMethod · 0.65
eqFunction · 0.50

Tested by

no test coverage detected