MCPcopy
hub / github.com/simstudioai/sim / getCsvPreviewSlice

Function getCsvPreviewSlice

apps/sim/lib/file-parsers/csv-preview-slice.ts:49–140  ·  view source on GitHub ↗
({
  key,
  context,
  signal,
}: CsvPreviewSliceArgs)

Source from the content-addressed store, hash-verified

47 * read (one past the cap, to detect truncation), so a multi-GB file costs O(rows) of memory.
48 */
49export async function getCsvPreviewSlice({
50 key,
51 context,
52 signal,
53}: CsvPreviewSliceArgs): Promise<CsvPreviewSlice> {
54 const source = await downloadFileStream({ key, context })
55 const onAbort = () => source.destroy()
56 signal?.addEventListener('abort', onAbort, { once: true })
57
58 const reader = source[Symbol.asyncIterator]()
59
60 try {
61 // Pull chunks until the first newline so the delimiter can be sniffed before parsing.
62 // Accumulate the header line incrementally — appending each chunk's decoded text rather than
63 // re-concatenating the whole buffer each iteration (which would be O(n²) for a header split
64 // across many small chunks). The delimiter chars (`,` `\t` `;`) are ASCII, so a multi-byte
65 // character split at a chunk boundary can't introduce a false delimiter into the count.
66 const sniffed: Buffer[] = []
67 let firstLine = ''
68 let sniffedBytes = 0
69 while (true) {
70 const { value, done } = await reader.next()
71 if (done) break
72 const chunk = Buffer.isBuffer(value) ? value : Buffer.from(value)
73 sniffed.push(chunk)
74 sniffedBytes += chunk.length
75 const text = chunk.toString('utf-8')
76 const nl = text.indexOf('\n')
77 if (nl !== -1) {
78 firstLine += text.slice(0, nl)
79 break
80 }
81 firstLine += text
82 if (sniffedBytes >= DELIMITER_SNIFF_MAX_BYTES) break
83 }
84
85 if (sniffed.length === 0) {
86 return { headers: [], rows: [], truncated: false }
87 }
88
89 const delimiter = detectDelimiter(firstLine)
90 const parser = parseCsvStream({
91 columns: false,
92 skip_empty_lines: true,
93 trim: true,
94 relax_column_count: true,
95 relax_quotes: true,
96 skip_records_with_error: true,
97 cast: false,
98 bom: true,
99 delimiter,
100 })
101
102 // Re-feed the sniffed prefix, then drain the rest of the source into the parser.
103 async function* rejoin() {
104 for (const chunk of sniffed) yield chunk
105 while (true) {
106 const { value, done } = await reader.next()

Callers 2

route.tsFile · 0.90

Calls 7

downloadFileStreamFunction · 0.90
rejoinFunction · 0.85
onMethod · 0.80
detectDelimiterFunction · 0.70
destroyMethod · 0.65
pushMethod · 0.45
toStringMethod · 0.45

Tested by

no test coverage detected