hub / github.com/nsonaniya2010/SubDomainizer / ExtJsExtract

Method ExtJsExtract

SubDomainizer.py:232–276 · view source on GitHub ↗

Parameters ---------- url : str URL of the page from which data needs to be extracted. Note: This is the url of the page given as user input. heads : dict Headers needed to make request, given URL. Raises --------

(self, url, heads)

Source from the content-addressed store, hash-verified

230	color='red', attrs=['bold']))
231
232	def ExtJsExtract(self, url, heads):
233	"""
234
235	Parameters
236	----------
237	url : str
238	URL of the page from which data needs to be extracted.
239	Note: This is the url of the page given as user input.
240	heads : dict
241	Headers needed to make request, given URL.
242
243	Raises
244	----------
245	UnicodeDecodeError
246	Raise an error if the endcoding found in the page is unkown.
247	"""
248	# domain = urlparse(url).netloc
249
250	print(termcolor.colored(
251	"Searching for External Javascript links in page...", color='yellow', attrs=['bold']))
252	if url.startswith('http://') or url.startswith('https://'):
253	if isSSL:
254	req = requests.get(url, headers=heads, verify=False)
255	else:
256	req = requests.get(url, headers=heads)
257	else:
258	if isSSL:
259	req = requests.get(
260	'http://' + url, headers=heads, verify=False)
261	else:
262	req = requests.get('http://' + url, headers=heads)
263	try:
264	html = unquote(req.content.decode('unicode-escape'))
265	soup = BeautifulSoup(html, features='html.parser')
266
267	for link in soup.find_all('script'):
268	if link.get('src'):
269	text = urljoin(url, link.get('src'))
270	jsLinkList.append(text)
271	# jsLinkList.append(text + link.get('src').strip())
272	print(termcolor.colored(
273	"Successfully got all the external js links.", color='blue', attrs=['bold']))
274	except UnicodeDecodeError:
275	print("Decoding error, Exiting...")
276	sys.exit(1)
277
278	def SaveExtJsContent(self, js):
279	"""

Callers 1

subextractorFunction · 0.95

Calls

no outgoing calls

Tested by

no test coverage detected