MCPcopy
hub / github.com/projectdiscovery/katana / Crawl

Method Crawl

pkg/engine/headless/headless.go:96–215  ·  view source on GitHub ↗

Crawl executes the headless crawling on a given URL

(URL string)

Source from the content-addressed store, hash-verified

94
95// Crawl executes the headless crawling on a given URL
96func (h *Headless) Crawl(URL string) error {
97 if h.debugger != nil {
98 h.debugger.StartURL(URL, 0)
99 }
100 defer func() {
101 if h.debugger != nil {
102 h.debugger.EndURL(URL)
103 }
104 }()
105
106 scopeValidator := validateScopeFunc(h, URL)
107
108 crawlOpts := crawler.Options{
109 ChromiumPath: h.options.Options.SystemChromePath,
110 MaxDepth: h.options.Options.MaxDepth,
111 ShowBrowser: h.options.Options.ShowBrowser,
112 MaxCrawlDuration: h.options.Options.CrawlDuration,
113 MaxFailureCount: h.options.Options.MaxFailureCount,
114 NoSandbox: h.options.Options.HeadlessNoSandbox,
115 NoIncognito: h.options.Options.HeadlessNoIncognito,
116 UserDataDir: h.options.Options.ChromeDataDir,
117 Proxy: h.options.Options.Proxy,
118 MaxBrowsers: 1,
119 PageMaxTimeout: 30 * time.Second,
120 ScopeValidator: scopeValidator,
121 AutomaticFormFill: h.options.Options.AutomaticFormFill,
122 PageLoadStrategy: h.options.Options.PageLoadStrategy,
123 ChromeWSUrl: h.options.Options.ChromeWSUrl,
124 DOMWaitTime: h.options.Options.DOMWaitTime,
125 RequestCallback: func(rr *output.Result) {
126 if rr == nil || rr.Request == nil {
127 return
128 }
129 if scopeValidator != nil && !scopeValidator(rr.Request.URL) {
130 return
131 }
132
133 // Register the real (intercepted) request URL before parsing the
134 // response body for additional discoveries. This ensures that real
135 // results with full response data always take priority over
136 // synthetic Request-only entries produced by performAdditionalAnalysis.
137 isUnique := h.isUniqueURL(rr.Request.URL)
138
139 // Always run additional analysis regardless of uniqueness so we
140 // don't miss URL discoveries embedded in a response body that the
141 // browser happened to fetch more than once.
142 navigationRequests := h.performAdditionalAnalysis(rr)
143 for _, req := range navigationRequests {
144 if err := h.options.OutputWriter.Write(req); err != nil {
145 h.logger.Debug("failed to write navigation result",
146 slog.String("url", func() string {
147 if req != nil && req.Request != nil {
148 return req.Request.URL
149 }
150 return ""
151 }()),
152 slog.String("error", err.Error()),
153 )

Callers

nothing calls this directly

Calls 13

isUniqueURLMethod · 0.95
NewHandlerFunction · 0.92
NewFunction · 0.92
validateScopeFuncFunction · 0.85
StartURLMethod · 0.80
EndURLMethod · 0.80
ClassifyPageMethod · 0.80
WriteMethod · 0.65
CloseMethod · 0.65
CrawlMethod · 0.65

Tested by

no test coverage detected