MCPcopy Index your code
hub / github.com/simstudioai/sim / handleFileForOCR

Function handleFileForOCR

apps/sim/lib/knowledge/documents/document-processor.ts:312–382  ·  view source on GitHub ↗
(
  fileUrl: string,
  filename: string,
  mimeType: string,
  userId?: string,
  workspaceId?: string | null
)

Source from the content-addressed store, hash-verified

310}
311
312async function handleFileForOCR(
313 fileUrl: string,
314 filename: string,
315 mimeType: string,
316 userId?: string,
317 workspaceId?: string | null
318) {
319 const isExternalHttps = /^https:\/\//i.test(fileUrl) && !isInternalFileUrl(fileUrl)
320
321 if (isExternalHttps) {
322 if (mimeType === 'application/pdf') {
323 logger.info(`handleFileForOCR: Downloading external PDF to check page count`)
324 try {
325 const buffer = await downloadFileWithTimeout(fileUrl, userId)
326 logger.info(`handleFileForOCR: Downloaded external PDF: ${buffer.length} bytes`)
327 return { httpsUrl: fileUrl, buffer }
328 } catch (error) {
329 logger.warn(
330 `handleFileForOCR: Failed to download external PDF for page count check, proceeding without batching`,
331 {
332 error: toError(error).message,
333 }
334 )
335 return { httpsUrl: fileUrl, buffer: undefined }
336 }
337 }
338 logger.info(`handleFileForOCR: Using external URL directly`)
339 return { httpsUrl: fileUrl, buffer: undefined }
340 }
341
342 logger.info(`Uploading "${filename}" to cloud storage for OCR`)
343
344 const buffer = await downloadFileWithTimeout(fileUrl, userId)
345
346 logger.info(`Downloaded ${filename}: ${buffer.length} bytes`)
347
348 try {
349 const metadata: Record<string, string> = {
350 originalName: filename,
351 uploadedAt: new Date().toISOString(),
352 purpose: 'knowledge-base',
353 ...(userId && { userId }),
354 ...(workspaceId && { workspaceId }),
355 }
356
357 const timestamp = Date.now()
358 const uniqueId = randomBytes(8).toString('hex')
359 const safeFileName = filename.replace(/[^a-zA-Z0-9.-]/g, '_')
360 const customKey = `kb/${timestamp}-${uniqueId}-${safeFileName}`
361
362 const cloudResult = await StorageService.uploadFile({
363 file: buffer,
364 fileName: filename,
365 contentType: mimeType,
366 context: 'knowledge-base',
367 customKey,
368 metadata,
369 })

Callers 1

parseWithMistralOCRFunction · 0.85

Calls 9

isInternalFileUrlFunction · 0.90
toErrorFunction · 0.90
getErrorMessageFunction · 0.90
downloadFileWithTimeoutFunction · 0.85
testMethod · 0.80
infoMethod · 0.80
warnMethod · 0.65
replaceMethod · 0.65
toStringMethod · 0.45

Tested by

no test coverage detected