Recursively crawl the target website using parallel workers.
(self)
| 205 | return [] |
| 206 | |
| 207 | def crawl_recursive(self) -> Set[str]: |
| 208 | """Recursively crawl the target website using parallel workers.""" |
| 209 | self.state_manager.set_phase(PentestPhase.CRAWLING) |
| 210 | console.print("[bold blue]🕷️ Starting web crawler...[/bold blue]") |
| 211 | |
| 212 | all_urls = set([self.target_url]) |
| 213 | queue = [(self.target_url, 0)] |
| 214 | lock = threading.Lock() |
| 215 | |
| 216 | with Progress( |
| 217 | SpinnerColumn(), |
| 218 | TextColumn("[progress.description]{task.description}"), |
| 219 | BarColumn(), |
| 220 | TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), |
| 221 | TimeElapsedColumn(), |
| 222 | console=console |
| 223 | ) as progress: |
| 224 | |
| 225 | task = progress.add_task( |
| 226 | f"[cyan]Crawling (depth: {self.depth})...", |
| 227 | total=None |
| 228 | ) |
| 229 | |
| 230 | with ThreadPoolExecutor(max_workers=min(self.threads, 10)) as executor: |
| 231 | while queue: |
| 232 | # Grab a batch from the queue |
| 233 | batch = [] |
| 234 | while queue and len(batch) < self.threads: |
| 235 | batch.append(queue.pop(0)) |
| 236 | |
| 237 | # Filter out items beyond depth |
| 238 | batch = [(url, depth) for url, depth in batch if depth < self.depth] |
| 239 | if not batch: |
| 240 | continue |
| 241 | |
| 242 | # Crawl batch in parallel |
| 243 | futures = {executor.submit(self.crawl, url, depth): (url, depth) for url, depth in batch} |
| 244 | |
| 245 | for future in futures: |
| 246 | try: |
| 247 | new_links = future.result(timeout=self.timeout * 3) |
| 248 | _, depth = futures[future] |
| 249 | with lock: |
| 250 | for link in new_links: |
| 251 | if link not in all_urls: |
| 252 | all_urls.add(link) |
| 253 | queue.append((link, depth + 1)) |
| 254 | self.state_manager.update_urls(discovered=1) |
| 255 | except Exception as e: |
| 256 | logger.debug(f"Crawl worker error: {e}") |
| 257 | |
| 258 | progress.update( |
| 259 | task, |
| 260 | description=f"[cyan]Crawling... Found {len(all_urls)} URLs" |
| 261 | ) |
| 262 | |
| 263 | console.print(f"[green]✓[/green] Crawling complete. Found {len(all_urls)} URLs\n") |
| 264 | return all_urls |
no test coverage detected