MCPcopy
hub / github.com/simstudioai/sim / fallbackExtraction

Method fallbackExtraction

apps/sim/lib/file-parsers/pptx-parser.ts:78–108  ·  view source on GitHub ↗
(buffer: Buffer)

Source from the content-addressed store, hash-verified

76 }
77
78 private fallbackExtraction(buffer: Buffer): FileParseResult {
79 logger.info('Using fallback text extraction for PowerPoint file')
80
81 const text = buffer.toString('utf8', 0, Math.min(buffer.length, 200000))
82
83 const readableText = text
84 .match(/[\x20-\x7E\s]{4,}/g)
85 ?.filter(
86 (chunk) =>
87 chunk.trim().length > 10 &&
88 /[a-zA-Z]/.test(chunk) &&
89 !/^[\x00-\x1F]*$/.test(chunk) &&
90 !/^[^\w\s]*$/.test(chunk)
91 )
92 .join(' ')
93 .replace(/\s+/g, ' ')
94 .trim()
95
96 const content = readableText
97 ? sanitizeTextForUTF8(readableText)
98 : 'Unable to extract text from PowerPoint file. Please ensure the file contains readable text content.'
99
100 return {
101 content,
102 metadata: {
103 extractionMethod: 'fallback',
104 characterCount: content.length,
105 warning: 'Basic text extraction used',
106 },
107 }
108 }
109}

Callers 1

parseBufferMethod · 0.95

Calls 6

sanitizeTextForUTF8Function · 0.90
infoMethod · 0.80
joinMethod · 0.80
testMethod · 0.80
replaceMethod · 0.65
toStringMethod · 0.45

Tested by

no test coverage detected