MCPcopy
hub / github.com/s0md3v/XSStrike / htmlParser

Function htmlParser

core/htmlParser.py:7–91  ·  view source on GitHub ↗
(response, encoding)

Source from the content-addressed store, hash-verified

5
6
7def htmlParser(response, encoding):
8 rawResponse = response # raw response returned by requests
9 response = response.text # response content
10 if encoding: # if the user has specified an encoding, encode the probe in that
11 response = response.replace(encoding(xsschecker), xsschecker)
12 reflections = response.count(xsschecker)
13 position_and_context = {}
14 environment_details = {}
15 clean_response = re.sub(r'<!--[.\s\S]*?-->', '', response)
16 script_checkable = clean_response
17 for script in extractScripts(script_checkable):
18 occurences = re.finditer(r'(%s.*?)$' % xsschecker, script)
19 if occurences:
20 for occurence in occurences:
21 thisPosition = occurence.start(1)
22 position_and_context[thisPosition] = 'script'
23 environment_details[thisPosition] = {}
24 environment_details[thisPosition]['details'] = {'quote' : ''}
25 for i in range(len(occurence.group())):
26 currentChar = occurence.group()[i]
27 if currentChar in ('/', '\'', '`', '"&#x27;) and not escaped(i, occurence.group()):
28 environment_details[thisPosition]['details']['quote'] = currentChar
29 elif currentChar in (')', ']', '}', '}') and not escaped(i, occurence.group()):
30 break
31 script_checkable = script_checkable.replace(xsschecker, '', 1)
32 if len(position_and_context) < reflections:
33 attribute_context = re.finditer(r'<[^>]*?(%s)[^>]*?>' % xsschecker, clean_response)
34 for occurence in attribute_context:
35 match = occurence.group(0)
36 thisPosition = occurence.start(1)
37 parts = re.split(r'\s', match)
38 tag = parts[0][1:]
39 for part in parts:
40 if xsschecker in part:
41 Type, quote, name, value = '', '', '', ''
42 if '=' in part:
43 quote = re.search(r'=([\'`"])?&#x27;, part).group(1)
44 name_and_value = part.split('=')[0], '='.join(part.split('=')[1:])
45 if xsschecker == name_and_value[0]:
46 Type = 'name'
47 else:
48 Type = 'value'
49 name = name_and_value[0]
50 value = name_and_value[1].rstrip('>').rstrip(quote).lstrip(quote)
51 else:
52 Type = 'flag'
53 position_and_context[thisPosition] = 'attribute'
54 environment_details[thisPosition] = {}
55 environment_details[thisPosition]['details'] = {'tag' : tag, 'type' : Type, 'quote' : quote, 'value' : value, 'name' : name}
56 if len(position_and_context) < reflections:
57 html_context = re.finditer(xsschecker, clean_response)
58 for occurence in html_context:
59 thisPosition = occurence.start()
60 if thisPosition not in position_and_context:
61 position_and_context[occurence.start()] = 'html'
62 environment_details[thisPosition] = {}
63 environment_details[thisPosition]['details'] = {}
64 if len(position_and_context) < reflections:

Callers 2

crawlFunction · 0.90
scanFunction · 0.90

Calls 3

extractScriptsFunction · 0.90
escapedFunction · 0.90
isBadContextFunction · 0.90

Tested by

no test coverage detected