* Runs the crawler. Returns a promise that resolves once all the requests are processed * and `autoscaledPool.isFinished` returns `true`. * * We can use the `requests` parameter to enqueue the initial requests — it is a shortcut for * running {@apilink BasicCrawler.addRequests|`c
(requests?: RequestsLike, options?: CrawlerRunOptions)
| 977 | * @param [options] Options for the request queue. |
| 978 | */ |
| 979 | async run(requests?: RequestsLike, options?: CrawlerRunOptions): Promise<FinalStatistics> { |
| 980 | if (this.running) { |
| 981 | throw new Error( |
| 982 | 'This crawler instance is already running, you can add more requests to it via `crawler.addRequests()`.', |
| 983 | ); |
| 984 | } |
| 985 | |
| 986 | const { purgeRequestQueue = true, ...addRequestsOptions } = options ?? {}; |
| 987 | |
| 988 | if (this.hasFinishedBefore) { |
| 989 | // When executing the run method for the second time explicitly, |
| 990 | // we need to purge the default RQ to allow processing the same requests again - this is important so users can |
| 991 | // pass in failed requests back to the `crawler.run()`, otherwise they would be considered as handled and |
| 992 | // ignored - as a failed requests is still handled. |
| 993 | if (this.requestQueue?.name === 'default' && purgeRequestQueue) { |
| 994 | await this.requestQueue.drop(); |
| 995 | this.requestQueue = await this._getRequestQueue(); |
| 996 | this.requestManager = undefined; |
| 997 | await this.initializeRequestManager(); |
| 998 | } |
| 999 | |
| 1000 | this.stats.reset(); |
| 1001 | await this.stats.resetStore(); |
| 1002 | await this.sessionPool?.resetStore(); |
| 1003 | } |
| 1004 | |
| 1005 | this.unexpectedStop = false; |
| 1006 | this.running = true; |
| 1007 | this.loggedPerRun.clear(); |
| 1008 | |
| 1009 | await purgeDefaultStorages({ |
| 1010 | onlyPurgeOnce: true, |
| 1011 | client: this.config.getStorageClient(), |
| 1012 | config: this.config, |
| 1013 | }); |
| 1014 | |
| 1015 | if (requests) { |
| 1016 | await this.addRequests(requests, addRequestsOptions); |
| 1017 | } |
| 1018 | |
| 1019 | await this._init(); |
| 1020 | await this.stats.startCapturing(); |
| 1021 | const periodicLogger = this.getPeriodicLogger(); |
| 1022 | // Don't await, we don't want to block the execution |
| 1023 | void this.setStatusMessage('Starting the crawler.', { level: 'INFO' }); |
| 1024 | |
| 1025 | const sigintHandler = async () => { |
| 1026 | this.log.warning( |
| 1027 | 'Pausing... Press CTRL+C again to force exit. To resume, do: CRAWLEE_PURGE_ON_START=0 npm start', |
| 1028 | ); |
| 1029 | await this._pauseOnMigration(); |
| 1030 | await this.autoscaledPool!.abort(); |
| 1031 | }; |
| 1032 | |
| 1033 | // Attach a listener to handle migration and aborting events gracefully. |
| 1034 | const boundPauseOnMigration = this._pauseOnMigration.bind(this); |
| 1035 | process.once('SIGINT', sigintHandler); |
| 1036 | this.events.on(EventType.MIGRATING, boundPauseOnMigration); |
nothing calls this directly
no test coverage detected