Method load

packages/utils/src/internals/robots.ts:65–101 · view source on GitHub ↗

(
        url: string,
        proxyUrl?: string,
        options?: { signal?: AbortSignal; timeoutMillis?: number },
    )

Source from the content-addressed store, hash-verified

63	}
64
65	protected static async load(
66	url: string,
67	proxyUrl?: string,
68	options?: { signal?: AbortSignal; timeoutMillis?: number },
69	): Promise<RobotsTxtFile> {
70	if (!HTTPError) {
71	HTTPError = (await import('got-scraping')).HTTPError;
72	}
73
74	try {
75	const response = await gotScraping({
76	url,
77	proxyUrl,
78	method: 'GET',
79	responseType: 'text',
80	signal: options?.signal,
81	...(options?.timeoutMillis ? { timeout: { request: options.timeoutMillis } } : {}),
82	});
83
84	return new RobotsTxtFile(robotsParser(url.toString(), response.body), proxyUrl);
85	} catch (e) {
86	if (e instanceof HTTPError && e.response.statusCode === 404) {
87	return new RobotsTxtFile(
88	{
89	isAllowed() {
90	return true;
91	},
92	getSitemaps() {
93	return [];
94	},
95	},
96	proxyUrl,
97	);
98	}
99	throw e;
100	}
101	}
102
103	/**
104	* Check if a URL should be crawled by robots.

findMethod · 0.45

parseSitemapsMethod · 0.45

toStringMethod · 0.80

no test coverage detected