(self, url)
| 22 | self.conn = get_redis_conn(db=1) |
| 23 | |
| 24 | def get(self, url): |
| 25 | proxy = None |
| 26 | tries = 0 |
| 27 | while tries < self.retries: |
| 28 | if self.proxy_mode: |
| 29 | proxy = {'https': self.fetcher.get_proxy()} |
| 30 | while not proxy: |
| 31 | time.sleep(1) |
| 32 | proxy = {'https': self.fetcher.get_proxy()} |
| 33 | |
| 34 | try: |
| 35 | start = time.time() * 1000 |
| 36 | resp = requests.get(url, headers=self.headers, proxies=proxy, timeout=self.timeout) |
| 37 | end = time.time() * 1000 |
| 38 | if '安全验证' in resp.text: |
| 39 | self.fetcher.proxy_feedback('failure') |
| 40 | tries += 1 |
| 41 | continue |
| 42 | else: |
| 43 | print('Request succeeded! The proxy is {}'.format(proxy)) |
| 44 | # if you use greedy strategy, you must feedback |
| 45 | self.fetcher.proxy_feedback('success', int(end-start)) |
| 46 | # not considering transaction |
| 47 | self.conn.incr(self.success_req, 1) |
| 48 | self.conn.rpush(self.cur_time, int(end/1000)) |
| 49 | return resp.text |
| 50 | except Exception as e: |
| 51 | print(e) |
| 52 | # it's important to feedback, otherwise you may use the bad proxy next time |
| 53 | self.fetcher.proxy_feedback('failure') |
| 54 | tries += 1 |
| 55 | return None |
no test coverage detected