MCPcopy Index your code
hub / github.com/sqlmapproject/sqlmap / crawlThread

Function crawlThread

lib/utils/crawler.py:54–138  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

52 threadData.shared.formsFound = False
53
54 def crawlThread():
55 threadData = getCurrentThreadData()
56
57 while kb.threadContinue:
58 with kb.locks.limit:
59 if threadData.shared.unprocessed:
60 current = threadData.shared.unprocessed.pop()
61 if current in visited:
62 continue
63 elif conf.crawlExclude and re.search(conf.crawlExclude, current):
64 dbgMsg = "skipping '%s'" % current
65 logger.debug(dbgMsg)
66 continue
67 else:
68 visited.add(current)
69 else:
70 break
71
72 content = None
73 try:
74 if current:
75 content = Request.getPage(url=current, post=post, cookie=None, crawling=True, raise404=False)[0]
76 except SqlmapConnectionException as ex:
77 errMsg = "connection exception detected ('%s'). skipping " % getSafeExString(ex)
78 errMsg += "URL '%s'" % current
79 logger.critical(errMsg)
80 except SqlmapSyntaxException:
81 errMsg = "invalid URL detected. skipping '%s'" % current
82 logger.critical(errMsg)
83 except _http_client.InvalidURL as ex:
84 errMsg = "invalid URL detected ('%s'). skipping " % getSafeExString(ex)
85 errMsg += "URL '%s'" % current
86 logger.critical(errMsg)
87
88 if not kb.threadContinue:
89 break
90
91 if isinstance(content, six.text_type):
92 try:
93 match = re.search(r"(?si)<html[^>]*>(.+)</html>", content)
94 if match:
95 content = "<html>%s</html>" % match.group(1)
96
97 soup = BeautifulSoup(content)
98 tags = soup('a')
99
100 tags += re.finditer(r'(?i)\s(href|src)=["\'](?P<href>[^>"\']+)', content)
101 tags += re.finditer(r'(?i)window\.open\(["\'](?P<href>[^)"\']+)["\']', content)
102
103 for tag in tags:
104 href = tag.get("href") if hasattr(tag, "get") else tag.group("href")
105
106 if href:
107 if threadData.lastRedirectURL and threadData.lastRedirectURL[0] == threadData.lastRequestUID:
108 current = threadData.lastRedirectURL[1]
109 url = _urllib.parse.urljoin(current, htmlUnescape(href))
110
111 # flag to know if we are dealing with the same target host

Callers

nothing calls this directly

Calls 15

getCurrentThreadDataFunction · 0.90
getSafeExStringFunction · 0.90
BeautifulSoupClass · 0.90
htmlUnescapeFunction · 0.90
checkSameHostFunction · 0.90
extractRegexResultFunction · 0.90
findPageFormsFunction · 0.90
dataToStdoutFunction · 0.90
roundFunction · 0.85
debugMethod · 0.80
getPageMethod · 0.80
popMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…