MCPcopy Index your code
hub / github.com/simstudioai/sim / parseDocument

Function parseDocument

apps/sim/lib/knowledge/documents/document-processor.ts:277–310  ·  view source on GitHub ↗
(
  fileUrl: string,
  filename: string,
  mimeType: string,
  userId?: string,
  workspaceId?: string | null
)

Source from the content-addressed store, hash-verified

275}
276
277async function parseDocument(
278 fileUrl: string,
279 filename: string,
280 mimeType: string,
281 userId?: string,
282 workspaceId?: string | null
283): Promise<{
284 content: string
285 processingMethod: 'file-parser' | 'mistral-ocr'
286 cloudUrl?: string
287 metadata?: FileParseMetadata
288}> {
289 const isPDF = mimeType === 'application/pdf'
290 const hasAzureMistralOCR =
291 env.OCR_AZURE_API_KEY && env.OCR_AZURE_ENDPOINT && env.OCR_AZURE_MODEL_NAME
292
293 const mistralApiKey = await getMistralApiKey(workspaceId)
294 const hasMistralOCR = !!mistralApiKey
295
296 if (isPDF && (hasAzureMistralOCR || hasMistralOCR)) {
297 if (hasAzureMistralOCR) {
298 logger.info(`Using Azure Mistral OCR: ${filename}`)
299 return parseWithAzureMistralOCR(fileUrl, filename, mimeType, userId)
300 }
301
302 if (hasMistralOCR) {
303 logger.info(`Using Mistral OCR: ${filename}`)
304 return parseWithMistralOCR(fileUrl, filename, mimeType, userId, workspaceId, mistralApiKey)
305 }
306 }
307
308 logger.info(`Using file parser: ${filename}`)
309 return parseWithFileParser(fileUrl, filename, mimeType, userId)
310}
311
312async function handleFileForOCR(
313 fileUrl: string,

Callers 1

processDocumentFunction · 0.85

Calls 5

getMistralApiKeyFunction · 0.85
parseWithAzureMistralOCRFunction · 0.85
parseWithMistralOCRFunction · 0.85
parseWithFileParserFunction · 0.85
infoMethod · 0.80

Tested by

no test coverage detected