Crawl executes the headless crawling on a given URL
(URL string)
| 94 | |
| 95 | // Crawl executes the headless crawling on a given URL |
| 96 | func (h *Headless) Crawl(URL string) error { |
| 97 | if h.debugger != nil { |
| 98 | h.debugger.StartURL(URL, 0) |
| 99 | } |
| 100 | defer func() { |
| 101 | if h.debugger != nil { |
| 102 | h.debugger.EndURL(URL) |
| 103 | } |
| 104 | }() |
| 105 | |
| 106 | scopeValidator := validateScopeFunc(h, URL) |
| 107 | |
| 108 | crawlOpts := crawler.Options{ |
| 109 | ChromiumPath: h.options.Options.SystemChromePath, |
| 110 | MaxDepth: h.options.Options.MaxDepth, |
| 111 | ShowBrowser: h.options.Options.ShowBrowser, |
| 112 | MaxCrawlDuration: h.options.Options.CrawlDuration, |
| 113 | MaxFailureCount: h.options.Options.MaxFailureCount, |
| 114 | NoSandbox: h.options.Options.HeadlessNoSandbox, |
| 115 | NoIncognito: h.options.Options.HeadlessNoIncognito, |
| 116 | UserDataDir: h.options.Options.ChromeDataDir, |
| 117 | Proxy: h.options.Options.Proxy, |
| 118 | MaxBrowsers: 1, |
| 119 | PageMaxTimeout: 30 * time.Second, |
| 120 | ScopeValidator: scopeValidator, |
| 121 | AutomaticFormFill: h.options.Options.AutomaticFormFill, |
| 122 | PageLoadStrategy: h.options.Options.PageLoadStrategy, |
| 123 | ChromeWSUrl: h.options.Options.ChromeWSUrl, |
| 124 | DOMWaitTime: h.options.Options.DOMWaitTime, |
| 125 | RequestCallback: func(rr *output.Result) { |
| 126 | if rr == nil || rr.Request == nil { |
| 127 | return |
| 128 | } |
| 129 | if scopeValidator != nil && !scopeValidator(rr.Request.URL) { |
| 130 | return |
| 131 | } |
| 132 | |
| 133 | // Register the real (intercepted) request URL before parsing the |
| 134 | // response body for additional discoveries. This ensures that real |
| 135 | // results with full response data always take priority over |
| 136 | // synthetic Request-only entries produced by performAdditionalAnalysis. |
| 137 | isUnique := h.isUniqueURL(rr.Request.URL) |
| 138 | |
| 139 | // Always run additional analysis regardless of uniqueness so we |
| 140 | // don't miss URL discoveries embedded in a response body that the |
| 141 | // browser happened to fetch more than once. |
| 142 | navigationRequests := h.performAdditionalAnalysis(rr) |
| 143 | for _, req := range navigationRequests { |
| 144 | if err := h.options.OutputWriter.Write(req); err != nil { |
| 145 | h.logger.Debug("failed to write navigation result", |
| 146 | slog.String("url", func() string { |
| 147 | if req != nil && req.Request != nil { |
| 148 | return req.Request.URL |
| 149 | } |
| 150 | return "" |
| 151 | }()), |
| 152 | slog.String("error", err.Error()), |
| 153 | ) |
nothing calls this directly
no test coverage detected