MCPcopy Index your code
hub / github.com/simstudioai/sim / parseBuffer

Method parseBuffer

apps/sim/lib/file-parsers/doc-parser.ts:28–80  ·  view source on GitHub ↗
(buffer: Buffer)

Source from the content-addressed store, hash-verified

26 }
27
28 async parseBuffer(buffer: Buffer): Promise<FileParseResult> {
29 try {
30 if (!buffer || buffer.length === 0) {
31 throw new Error('Empty buffer provided')
32 }
33
34 try {
35 const officeParser = await import('officeparser')
36 const result = await officeParser.parseOfficeAsync(buffer)
37
38 if (result) {
39 const resultString = typeof result === 'string' ? result : String(result)
40 const content = sanitizeTextForUTF8(resultString.trim())
41
42 if (content.length > 0) {
43 return {
44 content,
45 metadata: {
46 characterCount: content.length,
47 extractionMethod: 'officeparser',
48 },
49 }
50 }
51 }
52 } catch (officeError) {
53 logger.warn('officeparser failed, trying mammoth:', officeError)
54 }
55
56 try {
57 const mammoth = await import('mammoth')
58 const result = await mammoth.extractRawText({ buffer })
59
60 if (result.value && result.value.trim().length > 0) {
61 const content = sanitizeTextForUTF8(result.value.trim())
62 return {
63 content,
64 metadata: {
65 characterCount: content.length,
66 extractionMethod: 'mammoth',
67 messages: result.messages,
68 },
69 }
70 }
71 } catch (mammothError) {
72 logger.warn('mammoth failed:', mammothError)
73 }
74
75 return this.fallbackExtraction(buffer)
76 } catch (error) {
77 logger.error('DOC parsing error:', error)
78 throw new Error(`Failed to parse DOC buffer: ${(error as Error).message}`)
79 }
80 }
81
82 private fallbackExtraction(buffer: Buffer): FileParseResult {
83 const isBinaryDoc = buffer.length >= 2 && buffer[0] === 0xd0 && buffer[1] === 0xcf

Callers 1

parseFileMethod · 0.95

Calls 4

fallbackExtractionMethod · 0.95
sanitizeTextForUTF8Function · 0.90
errorMethod · 0.80
warnMethod · 0.65

Tested by

no test coverage detected