(
filePath: string,
options?: { firstPage?: number; lastPage?: number },
)
| 177 | * @param options Optional page range (1-indexed, inclusive) |
| 178 | */ |
| 179 | export async function extractPDFPages( |
| 180 | filePath: string, |
| 181 | options?: { firstPage?: number; lastPage?: number }, |
| 182 | ): Promise<PDFResult<PDFExtractPagesResult>> { |
| 183 | try { |
| 184 | const fs = getFsImplementation() |
| 185 | const stats = await fs.stat(filePath) |
| 186 | const originalSize = stats.size |
| 187 | |
| 188 | if (originalSize === 0) { |
| 189 | return { |
| 190 | success: false, |
| 191 | error: { reason: 'empty', message: `PDF file is empty: ${filePath}` }, |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | if (originalSize > PDF_MAX_EXTRACT_SIZE) { |
| 196 | return { |
| 197 | success: false, |
| 198 | error: { |
| 199 | reason: 'too_large', |
| 200 | message: `PDF file exceeds maximum allowed size for text extraction (${formatFileSize(PDF_MAX_EXTRACT_SIZE)}).`, |
| 201 | }, |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | const available = await isPdftoppmAvailable() |
| 206 | if (!available) { |
| 207 | return { |
| 208 | success: false, |
| 209 | error: { |
| 210 | reason: 'unavailable', |
| 211 | message: |
| 212 | 'pdftoppm is not installed. Install poppler-utils (e.g. `brew install poppler` or `apt-get install poppler-utils`) to enable PDF page rendering.', |
| 213 | }, |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | const uuid = randomUUID() |
| 218 | const outputDir = join(getToolResultsDir(), `pdf-${uuid}`) |
| 219 | await mkdir(outputDir, { recursive: true }) |
| 220 | |
| 221 | // pdftoppm produces files like <prefix>-01.jpg, <prefix>-02.jpg, etc. |
| 222 | const prefix = join(outputDir, 'page') |
| 223 | const args = ['-jpeg', '-r', '100'] |
| 224 | if (options?.firstPage) { |
| 225 | args.push('-f', String(options.firstPage)) |
| 226 | } |
| 227 | if (options?.lastPage && options.lastPage !== Infinity) { |
| 228 | args.push('-l', String(options.lastPage)) |
| 229 | } |
| 230 | args.push(filePath, prefix) |
| 231 | const { code, stderr } = await execFileNoThrow('pdftoppm', args, { |
| 232 | timeout: 120_000, |
| 233 | useCwd: false, |
| 234 | }) |
| 235 | |
| 236 | if (code !== 0) { |
no test coverage detected