Parameters ---------- url : str URL of the page from which data needs to be extracted. Note: This is the url of the page given as user input. heads : dict Headers needed to make request, given URL. Raises --------
(self, url, heads)
| 230 | color='red', attrs=['bold'])) |
| 231 | |
| 232 | def ExtJsExtract(self, url, heads): |
| 233 | """ |
| 234 | |
| 235 | Parameters |
| 236 | ---------- |
| 237 | url : str |
| 238 | URL of the page from which data needs to be extracted. |
| 239 | Note: This is the url of the page given as user input. |
| 240 | heads : dict |
| 241 | Headers needed to make request, given URL. |
| 242 | |
| 243 | Raises |
| 244 | ---------- |
| 245 | UnicodeDecodeError |
| 246 | Raise an error if the endcoding found in the page is unkown. |
| 247 | """ |
| 248 | # domain = urlparse(url).netloc |
| 249 | |
| 250 | print(termcolor.colored( |
| 251 | "Searching for External Javascript links in page...", color='yellow', attrs=['bold'])) |
| 252 | if url.startswith('http://') or url.startswith('https://'): |
| 253 | if isSSL: |
| 254 | req = requests.get(url, headers=heads, verify=False) |
| 255 | else: |
| 256 | req = requests.get(url, headers=heads) |
| 257 | else: |
| 258 | if isSSL: |
| 259 | req = requests.get( |
| 260 | 'http://' + url, headers=heads, verify=False) |
| 261 | else: |
| 262 | req = requests.get('http://' + url, headers=heads) |
| 263 | try: |
| 264 | html = unquote(req.content.decode('unicode-escape')) |
| 265 | soup = BeautifulSoup(html, features='html.parser') |
| 266 | |
| 267 | for link in soup.find_all('script'): |
| 268 | if link.get('src'): |
| 269 | text = urljoin(url, link.get('src')) |
| 270 | jsLinkList.append(text) |
| 271 | # jsLinkList.append(text + link.get('src').strip()) |
| 272 | print(termcolor.colored( |
| 273 | "Successfully got all the external js links.", color='blue', attrs=['bold'])) |
| 274 | except UnicodeDecodeError: |
| 275 | print("Decoding error, Exiting...") |
| 276 | sys.exit(1) |
| 277 | |
| 278 | def SaveExtJsContent(self, js): |
| 279 | """ |