| 16 | } |
| 17 | |
| 18 | class SpiderLoader extends BaseDocumentLoader { |
| 19 | private apiKey: string |
| 20 | private url: string |
| 21 | private mode: 'crawl' | 'scrape' |
| 22 | private limit?: number |
| 23 | private additionalMetadata?: Record<string, unknown> |
| 24 | private params?: Record<string, unknown> |
| 25 | |
| 26 | constructor(loaderParams: SpiderLoaderParameters) { |
| 27 | super() |
| 28 | const { apiKey, url, mode = 'crawl', limit, additionalMetadata, params } = loaderParams |
| 29 | if (!apiKey) { |
| 30 | throw new Error('Spider API key not set. You can set it as SPIDER_API_KEY in your .env file, or pass it to Spider.') |
| 31 | } |
| 32 | |
| 33 | this.apiKey = apiKey |
| 34 | this.url = url |
| 35 | this.mode = mode |
| 36 | this.limit = Number(limit) |
| 37 | this.additionalMetadata = additionalMetadata |
| 38 | this.params = params |
| 39 | } |
| 40 | |
| 41 | public async load(): Promise<DocumentInterface[]> { |
| 42 | const app = new SpiderApp({ apiKey: this.apiKey }) |
| 43 | let spiderDocs: any[] |
| 44 | |
| 45 | if (this.mode === 'scrape') { |
| 46 | const response = await app.scrapeUrl(this.url, this.params) |
| 47 | if (!response.success) { |
| 48 | throw new Error(`Spider: Failed to scrape URL. Error: ${response.error}`) |
| 49 | } |
| 50 | spiderDocs = [response.data] |
| 51 | } else if (this.mode === 'crawl') { |
| 52 | if (this.params) { |
| 53 | this.params.limit = this.limit |
| 54 | } |
| 55 | const response = await app.crawlUrl(this.url, this.params) |
| 56 | if (!response.success) { |
| 57 | throw new Error(`Spider: Failed to crawl URL. Error: ${response.error}`) |
| 58 | } |
| 59 | spiderDocs = response.data |
| 60 | } else { |
| 61 | throw new Error(`Unrecognized mode '${this.mode}'. Expected one of 'crawl', 'scrape'.`) |
| 62 | } |
| 63 | |
| 64 | return spiderDocs.map( |
| 65 | (doc) => |
| 66 | new Document({ |
| 67 | pageContent: doc.content || '', |
| 68 | metadata: { |
| 69 | ...(this.additionalMetadata || {}), |
| 70 | source: doc.url |
| 71 | } |
| 72 | }) |
| 73 | ) |
| 74 | } |
| 75 | } |
nothing calls this directly
no outgoing calls
no test coverage detected