(filePath: string)
| 32 | * @returns Result containing PDF data or a structured error |
| 33 | */ |
| 34 | export async function readPDF(filePath: string): Promise< |
| 35 | PDFResult<{ |
| 36 | type: 'pdf' |
| 37 | file: { |
| 38 | filePath: string |
| 39 | base64: string |
| 40 | originalSize: number |
| 41 | } |
| 42 | }> |
| 43 | > { |
| 44 | try { |
| 45 | const fs = getFsImplementation() |
| 46 | const stats = await fs.stat(filePath) |
| 47 | const originalSize = stats.size |
| 48 | |
| 49 | // Check if file is empty |
| 50 | if (originalSize === 0) { |
| 51 | return { |
| 52 | success: false, |
| 53 | error: { reason: 'empty', message: `PDF file is empty: ${filePath}` }, |
| 54 | } |
| 55 | } |
| 56 | |
| 57 | // Check if PDF exceeds maximum size |
| 58 | // The API has a 32MB total request limit. After base64 encoding (~33% larger), |
| 59 | // a PDF must be under ~20MB raw to leave room for conversation context. |
| 60 | if (originalSize > PDF_TARGET_RAW_SIZE) { |
| 61 | return { |
| 62 | success: false, |
| 63 | error: { |
| 64 | reason: 'too_large', |
| 65 | message: `PDF file exceeds maximum allowed size of ${formatFileSize(PDF_TARGET_RAW_SIZE)}.`, |
| 66 | }, |
| 67 | } |
| 68 | } |
| 69 | |
| 70 | const fileBuffer = await readFile(filePath) |
| 71 | |
| 72 | // Validate PDF magic bytes — reject files that aren't actually PDFs |
| 73 | // (e.g., HTML files renamed to .pdf) before they enter conversation context. |
| 74 | // Once an invalid PDF document block is in the message history, every subsequent |
| 75 | // API call fails with 400 "The PDF specified was not valid" and the session |
| 76 | // becomes unrecoverable without /clear. |
| 77 | const header = fileBuffer.subarray(0, 5).toString('ascii') |
| 78 | if (!header.startsWith('%PDF-')) { |
| 79 | return { |
| 80 | success: false, |
| 81 | error: { |
| 82 | reason: 'corrupted', |
| 83 | message: `File is not a valid PDF (missing %PDF- header): ${filePath}`, |
| 84 | }, |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | const base64 = fileBuffer.toString('base64') |
| 89 | |
| 90 | // Note: We cannot check page count here without parsing the PDF |
| 91 | // The API will enforce the 100-page limit and return an error if exceeded |
no test coverage detected