hub / github.com/s0md3v/XSStrike / htmlParser

Function htmlParser

core/htmlParser.py:7–91 · view source on GitHub ↗

(response, encoding)

Source from the content-addressed store, hash-verified

5
6
7	def htmlParser(response, encoding):
8	rawResponse = response # raw response returned by requests
9	response = response.text # response content
10	if encoding: # if the user has specified an encoding, encode the probe in that
11	response = response.replace(encoding(xsschecker), xsschecker)
12	reflections = response.count(xsschecker)
13	position_and_context = {}
14	environment_details = {}
15	clean_response = re.sub(r'<!--[.\s\S]*?-->', '', response)
16	script_checkable = clean_response
17	for script in extractScripts(script_checkable):
18	occurences = re.finditer(r'(%s.*?)$' % xsschecker, script)
19	if occurences:
20	for occurence in occurences:
21	thisPosition = occurence.start(1)
22	position_and_context[thisPosition] = 'script'
23	environment_details[thisPosition] = {}
24	environment_details[thisPosition]['details'] = {'quote' : ''}
25	for i in range(len(occurence.group())):
26	currentChar = occurence.group()[i]
27	if currentChar in ('/', '\'', '`', '"') and not escaped(i, occurence.group()):
28	environment_details[thisPosition]['details']['quote'] = currentChar
29	elif currentChar in (')', ']', '}', '}') and not escaped(i, occurence.group()):
30	break
31	script_checkable = script_checkable.replace(xsschecker, '', 1)
32	if len(position_and_context) < reflections:
33	attribute_context = re.finditer(r'<[^>]?(%s)[^>]?>' % xsschecker, clean_response)
34	for occurence in attribute_context:
35	match = occurence.group(0)
36	thisPosition = occurence.start(1)
37	parts = re.split(r'\s', match)
38	tag = parts[0][1:]
39	for part in parts:
40	if xsschecker in part:
41	Type, quote, name, value = '', '', '', ''
42	if '=' in part:
43	quote = re.search(r'=([\'`"])?', part).group(1)
44	name_and_value = part.split('=')[0], '='.join(part.split('=')[1:])
45	if xsschecker == name_and_value[0]:
46	Type = 'name'
47	else:
48	Type = 'value'
49	name = name_and_value[0]
50	value = name_and_value[1].rstrip('>').rstrip(quote).lstrip(quote)
51	else:
52	Type = 'flag'
53	position_and_context[thisPosition] = 'attribute'
54	environment_details[thisPosition] = {}
55	environment_details[thisPosition]['details'] = {'tag' : tag, 'type' : Type, 'quote' : quote, 'value' : value, 'name' : name}
56	if len(position_and_context) < reflections:
57	html_context = re.finditer(xsschecker, clean_response)
58	for occurence in html_context:
59	thisPosition = occurence.start()
60	if thisPosition not in position_and_context:
61	position_and_context[occurence.start()] = 'html'
62	environment_details[thisPosition] = {}
63	environment_details[thisPosition]['details'] = {}
64	if len(position_and_context) < reflections:

Callers 2

crawlFunction · 0.90

scanFunction · 0.90

Calls 3

extractScriptsFunction · 0.90

escapedFunction · 0.90

isBadContextFunction · 0.90

Tested by

no test coverage detected