MCPcopy
hub / github.com/simstudioai/sim / DocParser

Class DocParser

apps/sim/lib/file-parsers/doc-parser.ts:9–130  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

7const logger = createLogger('DocParser')
8
9export class DocParser implements FileParser {
10 async parseFile(filePath: string): Promise<FileParseResult> {
11 try {
12 if (!filePath) {
13 throw new Error('No file path provided')
14 }
15
16 if (!existsSync(filePath)) {
17 throw new Error(`File not found: ${filePath}`)
18 }
19
20 const buffer = await readFile(filePath)
21 return this.parseBuffer(buffer)
22 } catch (error) {
23 logger.error('DOC file parsing error:', error)
24 throw new Error(`Failed to parse DOC file: ${(error as Error).message}`)
25 }
26 }
27
28 async parseBuffer(buffer: Buffer): Promise<FileParseResult> {
29 try {
30 if (!buffer || buffer.length === 0) {
31 throw new Error('Empty buffer provided')
32 }
33
34 try {
35 const officeParser = await import('officeparser')
36 const result = await officeParser.parseOfficeAsync(buffer)
37
38 if (result) {
39 const resultString = typeof result === 'string' ? result : String(result)
40 const content = sanitizeTextForUTF8(resultString.trim())
41
42 if (content.length > 0) {
43 return {
44 content,
45 metadata: {
46 characterCount: content.length,
47 extractionMethod: 'officeparser',
48 },
49 }
50 }
51 }
52 } catch (officeError) {
53 logger.warn('officeparser failed, trying mammoth:', officeError)
54 }
55
56 try {
57 const mammoth = await import('mammoth')
58 const result = await mammoth.extractRawText({ buffer })
59
60 if (result.value && result.value.trim().length > 0) {
61 const content = sanitizeTextForUTF8(result.value.trim())
62 return {
63 content,
64 metadata: {
65 characterCount: content.length,
66 extractionMethod: 'mammoth',

Callers

nothing calls this directly

Calls

no outgoing calls

Tested by

no test coverage detected