( fileUrl: string, filename: string, mimeType: string, userId?: string, workspaceId?: string | null )
| 310 | } |
| 311 | |
| 312 | async function handleFileForOCR( |
| 313 | fileUrl: string, |
| 314 | filename: string, |
| 315 | mimeType: string, |
| 316 | userId?: string, |
| 317 | workspaceId?: string | null |
| 318 | ) { |
| 319 | const isExternalHttps = /^https:\/\//i.test(fileUrl) && !isInternalFileUrl(fileUrl) |
| 320 | |
| 321 | if (isExternalHttps) { |
| 322 | if (mimeType === 'application/pdf') { |
| 323 | logger.info(`handleFileForOCR: Downloading external PDF to check page count`) |
| 324 | try { |
| 325 | const buffer = await downloadFileWithTimeout(fileUrl, userId) |
| 326 | logger.info(`handleFileForOCR: Downloaded external PDF: ${buffer.length} bytes`) |
| 327 | return { httpsUrl: fileUrl, buffer } |
| 328 | } catch (error) { |
| 329 | logger.warn( |
| 330 | `handleFileForOCR: Failed to download external PDF for page count check, proceeding without batching`, |
| 331 | { |
| 332 | error: toError(error).message, |
| 333 | } |
| 334 | ) |
| 335 | return { httpsUrl: fileUrl, buffer: undefined } |
| 336 | } |
| 337 | } |
| 338 | logger.info(`handleFileForOCR: Using external URL directly`) |
| 339 | return { httpsUrl: fileUrl, buffer: undefined } |
| 340 | } |
| 341 | |
| 342 | logger.info(`Uploading "${filename}" to cloud storage for OCR`) |
| 343 | |
| 344 | const buffer = await downloadFileWithTimeout(fileUrl, userId) |
| 345 | |
| 346 | logger.info(`Downloaded ${filename}: ${buffer.length} bytes`) |
| 347 | |
| 348 | try { |
| 349 | const metadata: Record<string, string> = { |
| 350 | originalName: filename, |
| 351 | uploadedAt: new Date().toISOString(), |
| 352 | purpose: 'knowledge-base', |
| 353 | ...(userId && { userId }), |
| 354 | ...(workspaceId && { workspaceId }), |
| 355 | } |
| 356 | |
| 357 | const timestamp = Date.now() |
| 358 | const uniqueId = randomBytes(8).toString('hex') |
| 359 | const safeFileName = filename.replace(/[^a-zA-Z0-9.-]/g, '_') |
| 360 | const customKey = `kb/${timestamp}-${uniqueId}-${safeFileName}` |
| 361 | |
| 362 | const cloudResult = await StorageService.uploadFile({ |
| 363 | file: buffer, |
| 364 | fileName: filename, |
| 365 | contentType: mimeType, |
| 366 | context: 'knowledge-base', |
| 367 | customKey, |
| 368 | metadata, |
| 369 | }) |
no test coverage detected