(buffer: Buffer)
| 29 | } |
| 30 | |
| 31 | async parseBuffer(buffer: Buffer): Promise<FileParseResult> { |
| 32 | try { |
| 33 | logger.info('Parsing PowerPoint buffer, size:', buffer.length) |
| 34 | |
| 35 | if (!buffer || buffer.length === 0) { |
| 36 | throw new Error('Empty buffer provided') |
| 37 | } |
| 38 | |
| 39 | assertOoxmlArchiveWithinLimits(buffer) |
| 40 | |
| 41 | let parseOfficeAsync |
| 42 | try { |
| 43 | const officeParser = await import('officeparser') |
| 44 | parseOfficeAsync = officeParser.parseOfficeAsync |
| 45 | } catch (importError) { |
| 46 | logger.warn('officeparser not available, using fallback extraction') |
| 47 | return this.fallbackExtraction(buffer) |
| 48 | } |
| 49 | |
| 50 | try { |
| 51 | const result = await parseOfficeAsync(buffer) |
| 52 | |
| 53 | if (!result || typeof result !== 'string') { |
| 54 | throw new Error('officeparser returned invalid result') |
| 55 | } |
| 56 | |
| 57 | const content = sanitizeTextForUTF8(result.trim()) |
| 58 | |
| 59 | logger.info('PowerPoint parsing completed successfully with officeparser') |
| 60 | |
| 61 | return { |
| 62 | content: content, |
| 63 | metadata: { |
| 64 | characterCount: content.length, |
| 65 | extractionMethod: 'officeparser', |
| 66 | }, |
| 67 | } |
| 68 | } catch (extractError) { |
| 69 | logger.warn('officeparser failed, using fallback:', extractError) |
| 70 | return this.fallbackExtraction(buffer) |
| 71 | } |
| 72 | } catch (error) { |
| 73 | logger.error('PowerPoint buffer parsing error:', error) |
| 74 | throw new Error(`Failed to parse PowerPoint buffer: ${(error as Error).message}`) |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | private fallbackExtraction(buffer: Buffer): FileParseResult { |
| 79 | logger.info('Using fallback text extraction for PowerPoint file') |
no test coverage detected