Method get

examples/zhihu/crawler.py:24–55 · view source on GitHub ↗

(self, url)

Source from the content-addressed store, hash-verified

22	self.conn = get_redis_conn(db=1)
23
24	def get(self, url):
25	proxy = None
26	tries = 0
27	while tries < self.retries:
28	if self.proxy_mode:
29	proxy = {'https': self.fetcher.get_proxy()}
30	while not proxy:
31	time.sleep(1)
32	proxy = {'https': self.fetcher.get_proxy()}
33
34	try:
35	start = time.time() * 1000
36	resp = requests.get(url, headers=self.headers, proxies=proxy, timeout=self.timeout)
37	end = time.time() * 1000
38	if '安全验证' in resp.text:
39	self.fetcher.proxy_feedback('failure')
40	tries += 1
41	continue
42	else:
43	print('Request succeeded! The proxy is {}'.format(proxy))
44	# if you use greedy strategy, you must feedback
45	self.fetcher.proxy_feedback('success', int(end-start))
46	# not considering transaction
47	self.conn.incr(self.success_req, 1)
48	self.conn.rpush(self.cur_time, int(end/1000))
49	return resp.text
50	except Exception as e:
51	print(e)
52	# it's important to feedback, otherwise you may use the bad proxy next time
53	self.fetcher.proxy_feedback('failure')
54	tries += 1
55	return None

__init__Method · 0.80

checkMethod · 0.80

schedule_with_delayMethod · 0.80

get_lockMethod · 0.80

schedule_task_with_lockMethod · 0.80

scheduler_startFunction · 0.80

get_redis_connFunction · 0.80

release_lockFunction · 0.80

get_infoFunction · 0.80

get_per_followersFunction · 0.80

get_proxyMethod · 0.80

proxy_feedbackMethod · 0.80

no test coverage detected