(config: Config)
| 159 | } |
| 160 | |
| 161 | export async function write(config: Config) { |
| 162 | let nextFileNameString: PathLike = ""; |
| 163 | const jsonFiles = await glob("storage/datasets/default/*.json", { |
| 164 | absolute: true, |
| 165 | }); |
| 166 | |
| 167 | console.log(`Found ${jsonFiles.length} files to combine...`); |
| 168 | |
| 169 | let currentResults: Record<string, any>[] = []; |
| 170 | let currentSize: number = 0; |
| 171 | let fileCounter: number = 1; |
| 172 | const maxBytes: number = config.maxFileSize |
| 173 | ? config.maxFileSize * 1024 * 1024 |
| 174 | : Infinity; |
| 175 | |
| 176 | const getStringByteSize = (str: string): number => |
| 177 | Buffer.byteLength(str, "utf-8"); |
| 178 | |
| 179 | const nextFileName = (): string => |
| 180 | `${config.outputFileName.replace(/\.json$/, "")}-${fileCounter}.json`; |
| 181 | |
| 182 | const writeBatchToFile = async (): Promise<void> => { |
| 183 | nextFileNameString = nextFileName(); |
| 184 | await writeFile( |
| 185 | nextFileNameString, |
| 186 | JSON.stringify(currentResults, null, 2), |
| 187 | ); |
| 188 | console.log( |
| 189 | `Wrote ${currentResults.length} items to ${nextFileNameString}`, |
| 190 | ); |
| 191 | currentResults = []; |
| 192 | currentSize = 0; |
| 193 | fileCounter++; |
| 194 | }; |
| 195 | |
| 196 | let estimatedTokens: number = 0; |
| 197 | |
| 198 | const addContentOrSplit = async ( |
| 199 | data: Record<string, any>, |
| 200 | ): Promise<void> => { |
| 201 | const contentString: string = JSON.stringify(data); |
| 202 | const tokenCount: number | false = isWithinTokenLimit( |
| 203 | contentString, |
| 204 | config.maxTokens || Infinity, |
| 205 | ); |
| 206 | |
| 207 | if (typeof tokenCount === "number") { |
| 208 | if (estimatedTokens + tokenCount > config.maxTokens!) { |
| 209 | // Only write the batch if it's not empty (something to write) |
| 210 | if (currentResults.length > 0) { |
| 211 | await writeBatchToFile(); |
| 212 | } |
| 213 | // Since the addition of a single item exceeded the token limit, halve it. |
| 214 | estimatedTokens = Math.floor(tokenCount / 2); |
| 215 | currentResults.push(data); |
| 216 | } else { |
| 217 | currentResults.push(data); |
| 218 | estimatedTokens += tokenCount; |
no test coverage detected