MCPcopy
hub / github.com/nsonaniya2010/SubDomainizer / ExtJsExtract

Method ExtJsExtract

SubDomainizer.py:232–276  ·  view source on GitHub ↗

Parameters ---------- url : str URL of the page from which data needs to be extracted. Note: This is the url of the page given as user input. heads : dict Headers needed to make request, given URL. Raises --------

(self, url, heads)

Source from the content-addressed store, hash-verified

230 color='red', attrs=['bold']))
231
232 def ExtJsExtract(self, url, heads):
233 """
234
235 Parameters
236 ----------
237 url : str
238 URL of the page from which data needs to be extracted.
239 Note: This is the url of the page given as user input.
240 heads : dict
241 Headers needed to make request, given URL.
242
243 Raises
244 ----------
245 UnicodeDecodeError
246 Raise an error if the endcoding found in the page is unkown.
247 """
248 # domain = urlparse(url).netloc
249
250 print(termcolor.colored(
251 "Searching for External Javascript links in page...", color='yellow', attrs=['bold']))
252 if url.startswith('http://') or url.startswith('https://'):
253 if isSSL:
254 req = requests.get(url, headers=heads, verify=False)
255 else:
256 req = requests.get(url, headers=heads)
257 else:
258 if isSSL:
259 req = requests.get(
260 'http://' + url, headers=heads, verify=False)
261 else:
262 req = requests.get('http://' + url, headers=heads)
263 try:
264 html = unquote(req.content.decode('unicode-escape'))
265 soup = BeautifulSoup(html, features='html.parser')
266
267 for link in soup.find_all('script'):
268 if link.get('src'):
269 text = urljoin(url, link.get('src'))
270 jsLinkList.append(text)
271 # jsLinkList.append(text + link.get('src').strip())
272 print(termcolor.colored(
273 "Successfully got all the external js links.", color='blue', attrs=['bold']))
274 except UnicodeDecodeError:
275 print("Decoding error, Exiting...")
276 sys.exit(1)
277
278 def SaveExtJsContent(self, js):
279 """

Callers 1

subextractorFunction · 0.95

Calls

no outgoing calls

Tested by

no test coverage detected