MCPcopy
hub / github.com/Jack-Cherish/python-spider / Downloader

Method Downloader

biqukan.py:82–89  ·  view source on GitHub ↗
(self, url)

Source from the content-addressed store, hash-verified

80 2017-05-06
81 """
82 def Downloader(self, url):
83 download_req = request.Request(url = url, headers = self.__head)
84 download_response = request.urlopen(download_req)
85 download_html = download_response.read().decode('gbk','ignore')
86 soup_texts = BeautifulSoup(download_html, 'lxml')
87 texts = soup_texts.find_all(id = 'content', class_ = 'showtxt')
88 soup_text = BeautifulSoup(str(texts), 'lxml').div.text.replace('\xa0','')
89 return soup_text
90
91 """
92 函数说明:将爬取的文章内容写入文件

Callers 1

biqukan.pyFile · 0.80

Calls 1

replaceMethod · 0.80

Tested by

no test coverage detected