| 57 | } |
| 58 | |
| 59 | async function* readRows(path: string): AsyncGenerator<ExtractedRow> { |
| 60 | // Stream-like read by splitting the whole file by newlines. JSONL files at |
| 61 | // this scale (<= a few MB) fit comfortably in memory. |
| 62 | const text = await readFile(path, "utf8"); |
| 63 | for (const line of text.split("\n")) { |
| 64 | if (!line.trim()) continue; |
| 65 | try { |
| 66 | yield JSON.parse(line) as ExtractedRow; |
| 67 | } catch (err) { |
| 68 | console.warn( |
| 69 | `Skipping unparseable JSONL line: ${err instanceof Error ? err.message : err}`, |
| 70 | ); |
| 71 | } |
| 72 | } |
| 73 | } |
| 74 | |
| 75 | async function main() { |
| 76 | const args = parseArgs(); |