hub / github.com/sqlmapproject/sqlmap / crawl

Function crawl

lib/utils/crawler.py:44–238 · view source on GitHub ↗

(target, post=None, cookie=None)

Source from the content-addressed store, hash-verified

42	from thirdparty.six.moves import urllib as _urllib
43
44	def crawl(target, post=None, cookie=None):
45	if not target:
46	return
47
48	try:
49	visited = set()
50	threadData = getCurrentThreadData()
51	threadData.shared.value = OrderedSet()
52	threadData.shared.formsFound = False
53
54	def crawlThread():
55	threadData = getCurrentThreadData()
56
57	while kb.threadContinue:
58	with kb.locks.limit:
59	if threadData.shared.unprocessed:
60	current = threadData.shared.unprocessed.pop()
61	if current in visited:
62	continue
63	elif conf.crawlExclude and re.search(conf.crawlExclude, current):
64	dbgMsg = "skipping '%s'" % current
65	logger.debug(dbgMsg)
66	continue
67	else:
68	visited.add(current)
69	else:
70	break
71
72	content = None
73	try:
74	if current:
75	content = Request.getPage(url=current, post=post, cookie=None, crawling=True, raise404=False)[0]
76	except SqlmapConnectionException as ex:
77	errMsg = "connection exception detected ('%s'). skipping " % getSafeExString(ex)
78	errMsg += "URL '%s'" % current
79	logger.critical(errMsg)
80	except SqlmapSyntaxException:
81	errMsg = "invalid URL detected. skipping '%s'" % current
82	logger.critical(errMsg)
83	except _http_client.InvalidURL as ex:
84	errMsg = "invalid URL detected ('%s'). skipping " % getSafeExString(ex)
85	errMsg += "URL '%s'" % current
86	logger.critical(errMsg)
87
88	if not kb.threadContinue:
89	break
90
91	if isinstance(content, six.text_type):
92	try:
93	match = re.search(r"(?si)<html[^>]*>(.+)</html>", content)
94	if match:
95	content = "<html>%s</html>" % match.group(1)
96
97	soup = BeautifulSoup(content)
98	tags = soup('a')
99
100	tags += re.finditer(r'(?i)\s(href\|src)=["\'](?P<href>[^>"\']+)', content)
101	tags += re.finditer(r'(?i)window\.open\(["\'](?P<href>[^)"\']+)["\']', content)

Callers 2

mainFunction · 0.90

_setCrawlerFunction · 0.90

Calls 15

addMethod · 0.95

getCurrentThreadDataFunction · 0.90

OrderedSetClass · 0.90

readInputFunction · 0.90

parseSitemapFunction · 0.90

getSafeExStringFunction · 0.90

runThreadsFunction · 0.90

clearConsoleLineFunction · 0.90

urldecodeFunction · 0.90

xrangeClass · 0.85

storeResultsToFileFunction · 0.85

infoMethod · 0.80

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…