(
url: string,
proxyUrl?: string,
options?: { signal?: AbortSignal; timeoutMillis?: number },
)
| 63 | } |
| 64 | |
| 65 | protected static async load( |
| 66 | url: string, |
| 67 | proxyUrl?: string, |
| 68 | options?: { signal?: AbortSignal; timeoutMillis?: number }, |
| 69 | ): Promise<RobotsTxtFile> { |
| 70 | if (!HTTPError) { |
| 71 | HTTPError = (await import('got-scraping')).HTTPError; |
| 72 | } |
| 73 | |
| 74 | try { |
| 75 | const response = await gotScraping({ |
| 76 | url, |
| 77 | proxyUrl, |
| 78 | method: 'GET', |
| 79 | responseType: 'text', |
| 80 | signal: options?.signal, |
| 81 | ...(options?.timeoutMillis ? { timeout: { request: options.timeoutMillis } } : {}), |
| 82 | }); |
| 83 | |
| 84 | return new RobotsTxtFile(robotsParser(url.toString(), response.body), proxyUrl); |
| 85 | } catch (e) { |
| 86 | if (e instanceof HTTPError && e.response.statusCode === 404) { |
| 87 | return new RobotsTxtFile( |
| 88 | { |
| 89 | isAllowed() { |
| 90 | return true; |
| 91 | }, |
| 92 | getSitemaps() { |
| 93 | return []; |
| 94 | }, |
| 95 | }, |
| 96 | proxyUrl, |
| 97 | ); |
| 98 | } |
| 99 | throw e; |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | /** |
| 104 | * Check if a URL should be crawled by robots. |
no test coverage detected