MCPcopy Index your code
hub / github.com/simstudioai/sim / createChunk

Function createChunk

apps/sim/lib/knowledge/chunks/service.ts:106–244  ·  view source on GitHub ↗
(
  knowledgeBaseId: string,
  documentId: string,
  docTags: Record<string, string | number | boolean | Date | null>,
  chunkData: CreateChunkData,
  requestId: string,
  workspaceId?: string | null
)

Source from the content-addressed store, hash-verified

104 * Create a new chunk for a document
105 */
106export async function createChunk(
107 knowledgeBaseId: string,
108 documentId: string,
109 docTags: Record<string, string | number | boolean | Date | null>,
110 chunkData: CreateChunkData,
111 requestId: string,
112 workspaceId?: string | null
113): Promise<ChunkData> {
114 logger.info(`[${requestId}] Generating embedding for manual chunk`)
115 const kbRow = await db
116 .select({ embeddingModel: knowledgeBase.embeddingModel })
117 .from(knowledgeBase)
118 .where(and(eq(knowledgeBase.id, knowledgeBaseId), isNull(knowledgeBase.deletedAt)))
119 .limit(1)
120 if (kbRow.length === 0) {
121 throw new Error('Knowledge base not found')
122 }
123 const kbEmbeddingModel = kbRow[0].embeddingModel
124 const { embeddings } = await generateEmbeddings(
125 [chunkData.content],
126 kbEmbeddingModel,
127 workspaceId
128 )
129
130 const tokenCount = estimateTokenCount(
131 chunkData.content,
132 getEmbeddingModelInfo(kbEmbeddingModel).tokenizerProvider
133 )
134
135 const chunkId = generateId()
136 const now = new Date()
137
138 // Use transaction to atomically get next index and insert chunk
139 const newChunk = await db.transaction(async (tx) => {
140 const activeDocument = await tx
141 .select({ id: document.id })
142 .from(document)
143 .innerJoin(knowledgeBase, eq(document.knowledgeBaseId, knowledgeBase.id))
144 .where(
145 and(
146 eq(document.id, documentId),
147 eq(document.knowledgeBaseId, knowledgeBaseId),
148 isNull(document.archivedAt),
149 isNull(document.deletedAt),
150 isNull(knowledgeBase.deletedAt)
151 )
152 )
153 .limit(1)
154
155 if (activeDocument.length === 0) {
156 throw new Error('Document not found')
157 }
158
159 // Get the next chunk index atomically within the transaction
160 const lastChunk = await tx
161 .select({ chunkIndex: embedding.chunkIndex })
162 .from(embedding)
163 .where(eq(embedding.documentId, documentId))

Callers 1

route.tsFile · 0.90

Calls 8

generateEmbeddingsFunction · 0.90
estimateTokenCountFunction · 0.90
getEmbeddingModelInfoFunction · 0.90
generateIdFunction · 0.90
sha256HexFunction · 0.90
infoMethod · 0.80
setMethod · 0.65
eqFunction · 0.50

Tested by

no test coverage detected