parse html file to get proxies :return:
(self, html)
| 12 | urls = [BASE_URL] |
| 13 | |
| 14 | def parse(self, html): |
| 15 | """ |
| 16 | parse html file to get proxies |
| 17 | :return: |
| 18 | """ |
| 19 | |
| 20 | hosts_ports = html.split('\n') |
| 21 | for addr in hosts_ports: |
| 22 | if(addr): |
| 23 | ip_address = json.loads(addr) |
| 24 | host = ip_address['host'] |
| 25 | port = ip_address['port'] |
| 26 | yield Proxy(host=host, port=port) |
| 27 | |
| 28 | if __name__ == '__main__': |
| 29 | crawler = FatezeroCrawler() |