MCPcopy
hub / github.com/BuilderIO/gpt-crawler / addContentOrSplit

Function addContentOrSplit

src/core.ts:198–226  ·  view source on GitHub ↗
(
    data: Record<string, any>,
  )

Source from the content-addressed store, hash-verified

196 let estimatedTokens: number = 0;
197
198 const addContentOrSplit = async (
199 data: Record<string, any>,
200 ): Promise<void> => {
201 const contentString: string = JSON.stringify(data);
202 const tokenCount: number | false = isWithinTokenLimit(
203 contentString,
204 config.maxTokens || Infinity,
205 );
206
207 if (typeof tokenCount === "number") {
208 if (estimatedTokens + tokenCount > config.maxTokens!) {
209 // Only write the batch if it's not empty (something to write)
210 if (currentResults.length > 0) {
211 await writeBatchToFile();
212 }
213 // Since the addition of a single item exceeded the token limit, halve it.
214 estimatedTokens = Math.floor(tokenCount / 2);
215 currentResults.push(data);
216 } else {
217 currentResults.push(data);
218 estimatedTokens += tokenCount;
219 }
220 }
221
222 currentSize += getStringByteSize(contentString);
223 if (currentSize > maxBytes) {
224 await writeBatchToFile();
225 }
226 };
227
228 // Iterate over each JSON file and process its contents.
229 for (const file of jsonFiles) {

Callers 1

writeFunction · 0.85

Calls 2

writeBatchToFileFunction · 0.85
getStringByteSizeFunction · 0.85

Tested by

no test coverage detected