MCPcopy
hub / github.com/FlowiseAI/Flowise / SpiderLoader

Class SpiderLoader

packages/components/nodes/documentloaders/Spider/Spider.ts:18–75  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

16}
17
18class SpiderLoader extends BaseDocumentLoader {
19 private apiKey: string
20 private url: string
21 private mode: 'crawl' | 'scrape'
22 private limit?: number
23 private additionalMetadata?: Record<string, unknown>
24 private params?: Record<string, unknown>
25
26 constructor(loaderParams: SpiderLoaderParameters) {
27 super()
28 const { apiKey, url, mode = 'crawl', limit, additionalMetadata, params } = loaderParams
29 if (!apiKey) {
30 throw new Error('Spider API key not set. You can set it as SPIDER_API_KEY in your .env file, or pass it to Spider.')
31 }
32
33 this.apiKey = apiKey
34 this.url = url
35 this.mode = mode
36 this.limit = Number(limit)
37 this.additionalMetadata = additionalMetadata
38 this.params = params
39 }
40
41 public async load(): Promise<DocumentInterface[]> {
42 const app = new SpiderApp({ apiKey: this.apiKey })
43 let spiderDocs: any[]
44
45 if (this.mode === 'scrape') {
46 const response = await app.scrapeUrl(this.url, this.params)
47 if (!response.success) {
48 throw new Error(`Spider: Failed to scrape URL. Error: ${response.error}`)
49 }
50 spiderDocs = [response.data]
51 } else if (this.mode === 'crawl') {
52 if (this.params) {
53 this.params.limit = this.limit
54 }
55 const response = await app.crawlUrl(this.url, this.params)
56 if (!response.success) {
57 throw new Error(`Spider: Failed to crawl URL. Error: ${response.error}`)
58 }
59 spiderDocs = response.data
60 } else {
61 throw new Error(`Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape'.`)
62 }
63
64 return spiderDocs.map(
65 (doc) =>
66 new Document({
67 pageContent: doc.content || '',
68 metadata: {
69 ...(this.additionalMetadata || {}),
70 source: doc.url
71 }
72 })
73 )
74 }
75}

Callers

nothing calls this directly

Calls

no outgoing calls

Tested by

no test coverage detected