MCPcopy
hub / github.com/yujiosaka/headless-chrome-crawler / _startRequest

Method _startRequest

lib/hccrawler.js:291–309  ·  view source on GitHub ↗

* @param {!Object} options * @param {!number} depth * @param {string} previousUrl * @return {!Promise} * @private

(options, depth, previousUrl)

Source from the content-addressed store, hash-verified

289 * @private
290 */
291 async _startRequest(options, depth, previousUrl) {
292 const skip = await this._skipRequest(options);
293 if (skip) {
294 this.emit(HCCrawler.Events.RequestSkipped, options);
295 await this._markRequested(options);
296 return;
297 }
298 const allowed = await this._checkAllowedRobots(options, depth, previousUrl);
299 if (!allowed) {
300 this.emit(HCCrawler.Events.RequestDisallowed, options);
301 await this._markRequested(options);
302 return;
303 }
304 await this._followSitemap(options, depth, previousUrl);
305 const links = await this._request(options, depth, previousUrl);
306 this._checkRequestCount();
307 await this._followLinks(links, options, depth);
308 await delay(options.delay);
309 }
310
311 /**
312 * @param {!Object} options

Callers 1

constructorMethod · 0.95

Calls 7

_skipRequestMethod · 0.95
_markRequestedMethod · 0.95
_checkAllowedRobotsMethod · 0.95
_followSitemapMethod · 0.95
_requestMethod · 0.95
_checkRequestCountMethod · 0.95
_followLinksMethod · 0.95

Tested by

no test coverage detected