Method fallbackExtraction

apps/sim/lib/file-parsers/pptx-parser.ts:78–108 · view source on GitHub ↗

(buffer: Buffer)

Source from the content-addressed store, hash-verified

76	}
77
78	private fallbackExtraction(buffer: Buffer): FileParseResult {
79	logger.info('Using fallback text extraction for PowerPoint file')
80
81	const text = buffer.toString('utf8', 0, Math.min(buffer.length, 200000))
82
83	const readableText = text
84	.match(/[\x20-\x7E\s]{4,}/g)
85	?.filter(
86	(chunk) =>
87	chunk.trim().length > 10 &&
88	/[a-zA-Z]/.test(chunk) &&
89	!/^[\x00-\x1F]*$/.test(chunk) &&
90	!/^[^\w\s]*$/.test(chunk)
91	)
92	.join(' ')
93	.replace(/\s+/g, ' ')
94	.trim()
95
96	const content = readableText
97	? sanitizeTextForUTF8(readableText)
98	: 'Unable to extract text from PowerPoint file. Please ensure the file contains readable text content.'
99
100	return {
101	content,
102	metadata: {
103	extractionMethod: 'fallback',
104	characterCount: content.length,
105	warning: 'Basic text extraction used',
106	},
107	}
108	}
109	}

parseBufferMethod · 0.95

sanitizeTextForUTF8Function · 0.90

infoMethod · 0.80

joinMethod · 0.80

testMethod · 0.80

replaceMethod · 0.65

toStringMethod · 0.45

no test coverage detected