(dataBuffer: Buffer)
| 25 | } |
| 26 | |
| 27 | async parseBuffer(dataBuffer: Buffer): Promise<FileParseResult> { |
| 28 | try { |
| 29 | logger.info('Starting to parse buffer, size:', dataBuffer.length) |
| 30 | |
| 31 | const { extractText, getDocumentProxy } = await import('unpdf') |
| 32 | |
| 33 | const uint8Array = new Uint8Array(dataBuffer) |
| 34 | |
| 35 | const pdf = await getDocumentProxy(uint8Array) |
| 36 | |
| 37 | const { totalPages, text } = await extractText(pdf, { mergePages: true }) |
| 38 | |
| 39 | logger.info('PDF parsed successfully, pages:', totalPages, 'text length:', text.length) |
| 40 | |
| 41 | const cleanContent = text.replace(/\u0000/g, '') |
| 42 | |
| 43 | return { |
| 44 | content: cleanContent, |
| 45 | metadata: { |
| 46 | pageCount: totalPages, |
| 47 | source: 'unpdf', |
| 48 | }, |
| 49 | } |
| 50 | } catch (error) { |
| 51 | logger.error('Error parsing buffer:', error) |
| 52 | throw error |
| 53 | } |
| 54 | } |
| 55 | } |
no test coverage detected