Parameters ---------- url : str URL of the page from which data needs to be extracted. Note: This is the url of the page given as user input. heads : dict Headers needed to make request, given URL. Raises --------
(self, url, heads)
| 186 | """ |
| 187 | |
| 188 | def IntJsExtract(self, url, heads): |
| 189 | """ |
| 190 | |
| 191 | Parameters |
| 192 | ---------- |
| 193 | url : str |
| 194 | URL of the page from which data needs to be extracted. |
| 195 | Note: This is the url of the page given as user input. |
| 196 | heads : dict |
| 197 | Headers needed to make request, given URL. |
| 198 | |
| 199 | Raises |
| 200 | ---------- |
| 201 | UnicodeDecodeError |
| 202 | Raise an error if the endcoding found in the page is unkown. |
| 203 | """ |
| 204 | |
| 205 | if url.startswith('http://') or url.startswith('https://'): |
| 206 | if isSSL: |
| 207 | req = requests.get(url, headers=heads, verify=False) |
| 208 | else: |
| 209 | req = requests.get(url, headers=heads) |
| 210 | else: |
| 211 | if isSSL: |
| 212 | req = requests.get( |
| 213 | 'http://' + url, headers=heads, verify=False) |
| 214 | else: |
| 215 | req = requests.get('http://' + url, headers=heads) |
| 216 | |
| 217 | print(termcolor.colored("Searching for Inline Javascripts...", |
| 218 | color='yellow', attrs=['bold'])) |
| 219 | |
| 220 | try: |
| 221 | html = unquote(req.content.decode('unicode-escape')) |
| 222 | minhtml = htmlmin.minify(html, remove_empty_space=True) |
| 223 | minhtml = minhtml.replace('\n', '') |
| 224 | finallist.append(minhtml) |
| 225 | new_final_dict["Inline"] = minhtml |
| 226 | print(termcolor.colored( |
| 227 | "Successfully got all the Inline Scripts.", color='blue', attrs=['bold'])) |
| 228 | except UnicodeDecodeError: |
| 229 | print(termcolor.colored("Decoding error...", |
| 230 | color='red', attrs=['bold'])) |
| 231 | |
| 232 | def ExtJsExtract(self, url, heads): |
| 233 | """ |