(params: {
monitor: MonitorRow;
check: MonitorCheckRow;
target: MonitorTarget;
targetRun: Extract<MonitorTargetRun, { type: "crawl" }>;
})
| 839 | } |
| 840 | |
| 841 | async function enqueueMonitorCrawlTarget(params: { |
| 842 | monitor: MonitorRow; |
| 843 | check: MonitorCheckRow; |
| 844 | target: MonitorTarget; |
| 845 | targetRun: Extract<MonitorTargetRun, { type: "crawl" }>; |
| 846 | }): Promise<Extract<MonitorTargetRun, { type: "crawl" }>> { |
| 847 | if (params.target.type !== "crawl") { |
| 848 | throw new Error("Expected crawl target"); |
| 849 | } |
| 850 | |
| 851 | const crawlId = params.targetRun.crawlId; |
| 852 | const body = crawlRequestSchema.parse({ |
| 853 | url: params.target.url, |
| 854 | ...(params.target.crawlOptions ?? {}), |
| 855 | scrapeOptions: withMonitorScrapeDefaults(params.target.scrapeOptions ?? {}), |
| 856 | origin: "monitor", |
| 857 | }) as CrawlRequest; |
| 858 | |
| 859 | await logRequest({ |
| 860 | id: crawlId, |
| 861 | kind: "crawl", |
| 862 | api_version: "v2", |
| 863 | team_id: params.monitor.team_id, |
| 864 | origin: "monitor", |
| 865 | integration: null, |
| 866 | target_hint: body.url, |
| 867 | zeroDataRetention: false, |
| 868 | api_key_id: null, |
| 869 | }); |
| 870 | |
| 871 | const crawlerOptions = { |
| 872 | ...body, |
| 873 | url: undefined, |
| 874 | scrapeOptions: undefined, |
| 875 | prompt: undefined, |
| 876 | }; |
| 877 | |
| 878 | const sc: StoredCrawl = { |
| 879 | originUrl: body.url, |
| 880 | crawlerOptions: toV0CrawlerOptions(crawlerOptions), |
| 881 | scrapeOptions: body.scrapeOptions, |
| 882 | internalOptions: { |
| 883 | disableSmartWaitCache: true, |
| 884 | teamId: params.monitor.team_id, |
| 885 | saveScrapeResultToGCS: !!config.GCS_FIRE_ENGINE_BUCKET_NAME, |
| 886 | zeroDataRetention: false, |
| 887 | bypassBilling: true, |
| 888 | }, |
| 889 | team_id: params.monitor.team_id, |
| 890 | createdAt: Date.now(), |
| 891 | maxConcurrency: body.maxConcurrency, |
| 892 | zeroDataRetention: false, |
| 893 | }; |
| 894 | |
| 895 | const crawler = crawlToCrawler(crawlId, sc, null); |
| 896 | try { |
| 897 | sc.robots = await crawler.getRobotsTxt( |
| 898 | body.scrapeOptions.skipTlsVerification, |
no test coverage detected
searching dependent graphs…