MCPcopy
hub / github.com/simstudioai/sim / PdfParser

Class PdfParser

apps/sim/lib/file-parsers/pdf-parser.ts:7–55  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

5const logger = createLogger('PdfParser')
6
7export class PdfParser implements FileParser {
8 async parseFile(filePath: string): Promise<FileParseResult> {
9 try {
10 logger.info('Starting to parse file:', filePath)
11
12 if (!filePath) {
13 throw new Error('No file path provided')
14 }
15
16 logger.info('Reading file...')
17 const dataBuffer = await readFile(filePath)
18 logger.info('File read successfully, size:', dataBuffer.length)
19
20 return this.parseBuffer(dataBuffer)
21 } catch (error) {
22 logger.error('Error reading file:', error)
23 throw error
24 }
25 }
26
27 async parseBuffer(dataBuffer: Buffer): Promise<FileParseResult> {
28 try {
29 logger.info('Starting to parse buffer, size:', dataBuffer.length)
30
31 const { extractText, getDocumentProxy } = await import('unpdf')
32
33 const uint8Array = new Uint8Array(dataBuffer)
34
35 const pdf = await getDocumentProxy(uint8Array)
36
37 const { totalPages, text } = await extractText(pdf, { mergePages: true })
38
39 logger.info('PDF parsed successfully, pages:', totalPages, 'text length:', text.length)
40
41 const cleanContent = text.replace(/\u0000/g, '')
42
43 return {
44 content: cleanContent,
45 metadata: {
46 pageCount: totalPages,
47 source: 'unpdf',
48 },
49 }
50 } catch (error) {
51 logger.error('Error parsing buffer:', error)
52 throw error
53 }
54 }
55}

Callers

nothing calls this directly

Calls

no outgoing calls

Tested by

no test coverage detected