( params: ProcessRobotOutputParams )
| 25 | } |
| 26 | |
| 27 | export async function processRobotOutputFormats( |
| 28 | params: ProcessRobotOutputParams |
| 29 | ): Promise<ProcessRobotOutputResult> { |
| 30 | const { |
| 31 | robotType, |
| 32 | outputFormats, |
| 33 | categorizedOutput, |
| 34 | currentPage, |
| 35 | initialBinaryOutput, |
| 36 | llmConfig, |
| 37 | } = params; |
| 38 | |
| 39 | const binaryOutput: Record<string, any> = { |
| 40 | ...(initialBinaryOutput || {}), |
| 41 | }; |
| 42 | |
| 43 | const effectiveFormats = Array.isArray(outputFormats) |
| 44 | ? (outputFormats.length > 0 |
| 45 | ? outputFormats |
| 46 | : robotType === 'crawl' |
| 47 | ? DEFAULT_OUTPUT_FORMATS |
| 48 | : outputFormats) |
| 49 | : DEFAULT_OUTPUT_FORMATS; |
| 50 | |
| 51 | if (robotType !== 'crawl' && robotType !== 'search') { |
| 52 | return { categorizedOutput, binaryOutput }; |
| 53 | } |
| 54 | |
| 55 | if (robotType === 'crawl' && Array.isArray((categorizedOutput.crawl as any)?.['Crawl Results'])) { |
| 56 | const crawlResults: any[] = (categorizedOutput.crawl as any)['Crawl Results']; |
| 57 | const includeVisibleScreenshot = effectiveFormats.includes('screenshot-visible'); |
| 58 | const includeFullpageScreenshot = effectiveFormats.includes('screenshot-fullpage'); |
| 59 | |
| 60 | for (let pageIndex = 0; pageIndex < crawlResults.length; pageIndex++) { |
| 61 | const pageResult = crawlResults[pageIndex]; |
| 62 | if (!pageResult.error) { |
| 63 | let markdownConversionSucceeded = false; |
| 64 | if (effectiveFormats.includes('markdown') && pageResult.html) { |
| 65 | try { |
| 66 | pageResult.markdown = await parseMarkdown(pageResult.html, pageResult.metadata?.url); |
| 67 | markdownConversionSucceeded = true; |
| 68 | } catch (e: any) { |
| 69 | logger.log('warn', `Failed to convert crawl page to markdown: ${e.message}`); |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | if (!effectiveFormats.includes('html') && markdownConversionSucceeded) { |
| 74 | delete pageResult.html; |
| 75 | } |
| 76 | if (!effectiveFormats.includes('text')) delete pageResult.text; |
| 77 | if (!effectiveFormats.includes('links')) delete pageResult.links; |
| 78 | |
| 79 | if (effectiveFormats.includes('summary')) { |
| 80 | const pageText = (pageResult.markdown || pageResult.text || '').substring(0, 40000); |
| 81 | if (pageText.trim()) { |
| 82 | try { |
| 83 | const { summarizeMarkdown } = require('../utils/summarizer'); |
| 84 | pageResult.summary = await summarizeMarkdown(pageText, llmConfig); |
no test coverage detected