MCPcopy
hub / github.com/DropsDevopsOrg/ECommerceCrawlers / get_page

Method get_page

TaobaoCrawler/crawler.py:70–91  ·  view source on GitHub ↗
(self, url)

Source from the content-addressed store, hash-verified

68 return d
69
70 def get_page(self, url):
71 url = url
72 self.set_session()
73 # r = self.session.get(url, headers=HEADERS, cookies=self.get_cookie())
74 r = self.session.get(url, timeout=(14, 15))
75 if r.text.find('亲,小二正忙,滑动一下马上回来') > 0:
76 print("cookie需要验证!!!")
77 self.errMessage.put('cookie需要验证')
78 self.cdb.update_cookie_flag2(self.user)
79 self.search_url_Queue.put(url)
80 return False
81 if r.text.find('请输入') > 0:
82 self.errMessage.put('cookie无效登录')
83 print("Need Login!!!")
84 self.cdb.update_cookie_flag0(self.user)
85 self.search_url_Queue.put(url)
86 return False
87 self.page = r.text
88 self.parse()
89
90 time.sleep(4)
91 return True
92
93 def parse(self):
94 pattern = re.compile(r'g_page_config = ({.*});')

Callers 1

run_cryMethod · 0.95

Calls 5

set_sessionMethod · 0.95
parseMethod · 0.95
getMethod · 0.80
update_cookie_flag2Method · 0.80
update_cookie_flag0Method · 0.80

Tested by

no test coverage detected