MCPcopy
hub / github.com/projectdiscovery/katana / Do

Method Do

pkg/engine/common/base.go:401–496  ·  view source on GitHub ↗

Do executes the main crawling loop for the given crawl session. It processes items from the queue concurrently (respecting the Concurrency limit), validates each request (URL format, path filters, scope), applies rate limiting and delays, executes the request using the provided doRequest function, w

(crawlSession *CrawlSession, doRequest DoRequestFunc)

Source from the content-addressed store, hash-verified

399// The method returns when the queue is empty or the session context is cancelled
400// (due to timeout or manual cancellation). Returns an error if the context is cancelled.
401func (s *Shared) Do(crawlSession *CrawlSession, doRequest DoRequestFunc) error {
402 wg := sizedwaitgroup.New(s.Options.Options.Concurrency)
403 for item := range crawlSession.Queue.Pop() {
404 if ctxErr := crawlSession.Ctx.Err(); ctxErr != nil {
405 return ctxErr
406 }
407
408 req, ok := item.(*navigation.Request)
409 if !ok {
410 continue
411 }
412
413 if !utils.IsURL(req.URL) {
414 if s.Options.Options.OnSkipURL != nil {
415 s.Options.Options.OnSkipURL(req.URL)
416 }
417 gologger.Debug().Msgf("`%v` not a url. skipping", req.URL)
418 continue
419 }
420
421 if !s.Options.ValidatePath(req.URL) {
422 gologger.Debug().Msgf("`%v` filtered path. skipping", req.URL)
423 continue
424 }
425
426 inScope, scopeErr := s.Options.ValidateScope(req.URL, crawlSession.Hostname)
427 if scopeErr != nil {
428 gologger.Debug().Msgf("Error validating scope for `%v`: %v. skipping", req.URL, scopeErr)
429 continue
430 }
431 if !req.SkipValidation && !inScope {
432 gologger.Debug().Msgf("`%v` not in scope. skipping", req.URL)
433 continue
434 }
435
436 wg.Add()
437 // gologger.Debug().Msgf("Visiting: %v", req.URL) // not sure if this is needed
438 go func() {
439 defer wg.Done()
440
441 if s.Options.HostRateLimit != nil {
442 _ = s.Options.HostRateLimit.Take(crawlSession.Hostname)
443 } else if s.Options.RateLimit != nil {
444 s.Options.RateLimit.Take()
445 }
446 s.ApplyBackoff(crawlSession.Hostname)
447
448 // Delay if the user has asked for it
449 if s.Options.Options.Delay > 0 {
450 time.Sleep(time.Duration(s.Options.Options.Delay) * time.Second)
451 }
452
453 if s.Options.Options.MaxDomainPages > 0 {
454 counter := s.DomainCounter(crawlSession.Hostname)
455 if counter.Add(1) > int64(s.Options.Options.MaxDomainPages) {
456 return
457 }
458 }

Callers 4

getFormDSLEngineFunction · 0.45
makeRequestMethod · 0.45
VisitMethod · 0.45
VisitMethod · 0.45

Calls 15

ApplyBackoffMethod · 0.95
DomainCounterMethod · 0.95
RecordThrottleMethod · 0.95
RecordSuccessMethod · 0.95
OutputMethod · 0.95
EnqueueMethod · 0.95
IsURLFunction · 0.92
IsThrottledFunction · 0.85
RequestURLMethod · 0.80
IsRedirectMethod · 0.80
ParseResponseMethod · 0.80
WriteErrMethod · 0.65

Tested by

no test coverage detected