hub / github.com/simstudioai/sim / DocxParser

Class DocxParser

apps/sim/lib/file-parsers/docx-parser.ts:20–110 · view source on GitHub ↗

Source from the content-addressed store, hash-verified

18	}
19
20	export class DocxParser implements FileParser {
21	async parseFile(filePath: string): Promise<FileParseResult> {
22	try {
23	if (!filePath) {
24	throw new Error('No file path provided')
25	}
26
27	const buffer = await readFile(filePath)
28	return this.parseBuffer(buffer)
29	} catch (error) {
30	logger.error('DOCX file error:', error)
31	throw new Error(`Failed to parse DOCX file: ${(error as Error).message}`)
32	}
33	}
34
35	async parseBuffer(buffer: Buffer): Promise<FileParseResult> {
36	try {
37	if (!buffer \|\| buffer.length === 0) {
38	throw new Error('Empty buffer provided')
39	}
40
41	assertOoxmlArchiveWithinLimits(buffer)
42
43	try {
44	const result = await mammoth.extractRawText({ buffer })
45
46	if (result.value && result.value.trim().length > 0) {
47	let htmlResult: MammothResult = { value: '', messages: [] }
48	try {
49	htmlResult = await mammoth.convertToHtml({ buffer })
50	} catch {
51	// HTML conversion is optional
52	}
53
54	return {
55	content: sanitizeTextForUTF8(result.value),
56	metadata: {
57	extractionMethod: 'mammoth',
58	messages: [...result.messages, ...htmlResult.messages],
59	html: htmlResult.value,
60	},
61	}
62	}
63	} catch (mammothError) {
64	logger.warn('mammoth failed, trying officeparser:', mammothError)
65	}
66
67	try {
68	const officeParser = await import('officeparser')
69	const result = await officeParser.parseOfficeAsync(buffer)
70
71	if (result) {
72	const resultString = typeof result === 'string' ? result : String(result)
73	const content = sanitizeTextForUTF8(resultString.trim())
74
75	if (content.length > 0) {
76	return {
77	content,

Callers

nothing calls this directly

Calls

no outgoing calls

Tested by

no test coverage detected