MCPcopy
hub / github.com/BuilderIO/gpt-crawler / write

Function write

src/core.ts:161–241  ·  view source on GitHub ↗
(config: Config)

Source from the content-addressed store, hash-verified

159}
160
161export async function write(config: Config) {
162 let nextFileNameString: PathLike = "";
163 const jsonFiles = await glob("storage/datasets/default/*.json", {
164 absolute: true,
165 });
166
167 console.log(`Found ${jsonFiles.length} files to combine...`);
168
169 let currentResults: Record<string, any>[] = [];
170 let currentSize: number = 0;
171 let fileCounter: number = 1;
172 const maxBytes: number = config.maxFileSize
173 ? config.maxFileSize * 1024 * 1024
174 : Infinity;
175
176 const getStringByteSize = (str: string): number =>
177 Buffer.byteLength(str, "utf-8");
178
179 const nextFileName = (): string =>
180 `${config.outputFileName.replace(/\.json$/, "")}-${fileCounter}.json`;
181
182 const writeBatchToFile = async (): Promise<void> => {
183 nextFileNameString = nextFileName();
184 await writeFile(
185 nextFileNameString,
186 JSON.stringify(currentResults, null, 2),
187 );
188 console.log(
189 `Wrote ${currentResults.length} items to ${nextFileNameString}`,
190 );
191 currentResults = [];
192 currentSize = 0;
193 fileCounter++;
194 };
195
196 let estimatedTokens: number = 0;
197
198 const addContentOrSplit = async (
199 data: Record<string, any>,
200 ): Promise<void> => {
201 const contentString: string = JSON.stringify(data);
202 const tokenCount: number | false = isWithinTokenLimit(
203 contentString,
204 config.maxTokens || Infinity,
205 );
206
207 if (typeof tokenCount === "number") {
208 if (estimatedTokens + tokenCount > config.maxTokens!) {
209 // Only write the batch if it's not empty (something to write)
210 if (currentResults.length > 0) {
211 await writeBatchToFile();
212 }
213 // Since the addition of a single item exceeded the token limit, halve it.
214 estimatedTokens = Math.floor(tokenCount / 2);
215 currentResults.push(data);
216 } else {
217 currentResults.push(data);
218 estimatedTokens += tokenCount;

Callers 3

main.tsFile · 0.85
writeMethod · 0.85
handlerFunction · 0.85

Calls 2

addContentOrSplitFunction · 0.85
writeBatchToFileFunction · 0.85

Tested by

no test coverage detected