( knowledgeBaseId: string, documentId: string, docTags: Record<string, string | number | boolean | Date | null>, chunkData: CreateChunkData, requestId: string, workspaceId?: string | null )
| 104 | * Create a new chunk for a document |
| 105 | */ |
| 106 | export async function createChunk( |
| 107 | knowledgeBaseId: string, |
| 108 | documentId: string, |
| 109 | docTags: Record<string, string | number | boolean | Date | null>, |
| 110 | chunkData: CreateChunkData, |
| 111 | requestId: string, |
| 112 | workspaceId?: string | null |
| 113 | ): Promise<ChunkData> { |
| 114 | logger.info(`[${requestId}] Generating embedding for manual chunk`) |
| 115 | const kbRow = await db |
| 116 | .select({ embeddingModel: knowledgeBase.embeddingModel }) |
| 117 | .from(knowledgeBase) |
| 118 | .where(and(eq(knowledgeBase.id, knowledgeBaseId), isNull(knowledgeBase.deletedAt))) |
| 119 | .limit(1) |
| 120 | if (kbRow.length === 0) { |
| 121 | throw new Error('Knowledge base not found') |
| 122 | } |
| 123 | const kbEmbeddingModel = kbRow[0].embeddingModel |
| 124 | const { embeddings } = await generateEmbeddings( |
| 125 | [chunkData.content], |
| 126 | kbEmbeddingModel, |
| 127 | workspaceId |
| 128 | ) |
| 129 | |
| 130 | const tokenCount = estimateTokenCount( |
| 131 | chunkData.content, |
| 132 | getEmbeddingModelInfo(kbEmbeddingModel).tokenizerProvider |
| 133 | ) |
| 134 | |
| 135 | const chunkId = generateId() |
| 136 | const now = new Date() |
| 137 | |
| 138 | // Use transaction to atomically get next index and insert chunk |
| 139 | const newChunk = await db.transaction(async (tx) => { |
| 140 | const activeDocument = await tx |
| 141 | .select({ id: document.id }) |
| 142 | .from(document) |
| 143 | .innerJoin(knowledgeBase, eq(document.knowledgeBaseId, knowledgeBase.id)) |
| 144 | .where( |
| 145 | and( |
| 146 | eq(document.id, documentId), |
| 147 | eq(document.knowledgeBaseId, knowledgeBaseId), |
| 148 | isNull(document.archivedAt), |
| 149 | isNull(document.deletedAt), |
| 150 | isNull(knowledgeBase.deletedAt) |
| 151 | ) |
| 152 | ) |
| 153 | .limit(1) |
| 154 | |
| 155 | if (activeDocument.length === 0) { |
| 156 | throw new Error('Document not found') |
| 157 | } |
| 158 | |
| 159 | // Get the next chunk index atomically within the transaction |
| 160 | const lastChunk = await tx |
| 161 | .select({ chunkIndex: embedding.chunkIndex }) |
| 162 | .from(embedding) |
| 163 | .where(eq(embedding.documentId, documentId)) |
no test coverage detected