MCPcopy
hub / github.com/apify/crawlee / run

Method run

packages/basic-crawler/src/internals/basic-crawler.ts:979–1098  ·  view source on GitHub ↗

* Runs the crawler. Returns a promise that resolves once all the requests are processed * and `autoscaledPool.isFinished` returns `true`. * * We can use the `requests` parameter to enqueue the initial requests — it is a shortcut for * running {@apilink BasicCrawler.addRequests|`c

(requests?: RequestsLike, options?: CrawlerRunOptions)

Source from the content-addressed store, hash-verified

977 * @param [options] Options for the request queue.
978 */
979 async run(requests?: RequestsLike, options?: CrawlerRunOptions): Promise<FinalStatistics> {
980 if (this.running) {
981 throw new Error(
982 'This crawler instance is already running, you can add more requests to it via `crawler.addRequests()`.',
983 );
984 }
985
986 const { purgeRequestQueue = true, ...addRequestsOptions } = options ?? {};
987
988 if (this.hasFinishedBefore) {
989 // When executing the run method for the second time explicitly,
990 // we need to purge the default RQ to allow processing the same requests again - this is important so users can
991 // pass in failed requests back to the `crawler.run()`, otherwise they would be considered as handled and
992 // ignored - as a failed requests is still handled.
993 if (this.requestQueue?.name === 'default' && purgeRequestQueue) {
994 await this.requestQueue.drop();
995 this.requestQueue = await this._getRequestQueue();
996 this.requestManager = undefined;
997 await this.initializeRequestManager();
998 }
999
1000 this.stats.reset();
1001 await this.stats.resetStore();
1002 await this.sessionPool?.resetStore();
1003 }
1004
1005 this.unexpectedStop = false;
1006 this.running = true;
1007 this.loggedPerRun.clear();
1008
1009 await purgeDefaultStorages({
1010 onlyPurgeOnce: true,
1011 client: this.config.getStorageClient(),
1012 config: this.config,
1013 });
1014
1015 if (requests) {
1016 await this.addRequests(requests, addRequestsOptions);
1017 }
1018
1019 await this._init();
1020 await this.stats.startCapturing();
1021 const periodicLogger = this.getPeriodicLogger();
1022 // Don't await, we don't want to block the execution
1023 void this.setStatusMessage('Starting the crawler.', { level: 'INFO' });
1024
1025 const sigintHandler = async () => {
1026 this.log.warning(
1027 'Pausing... Press CTRL+C again to force exit. To resume, do: CRAWLEE_PURGE_ON_START=0 npm start',
1028 );
1029 await this._pauseOnMigration();
1030 await this.autoscaledPool!.abort();
1031 };
1032
1033 // Attach a listener to handle migration and aborting events gracefully.
1034 const boundPauseOnMigration = this._pauseOnMigration.bind(this);
1035 process.once('SIGINT', sigintHandler);
1036 this.events.on(EventType.MIGRATING, boundPauseOnMigration);

Callers

nothing calls this directly

Calls 15

_getRequestQueueMethod · 0.95
addRequestsMethod · 0.95
_initMethod · 0.95
getPeriodicLoggerMethod · 0.95
setStatusMessageMethod · 0.95
teardownMethod · 0.95
purgeDefaultStoragesFunction · 0.90
clearMethod · 0.80
getStorageClientMethod · 0.80
startCapturingMethod · 0.80
stopCapturingMethod · 0.80

Tested by

no test coverage detected