| 5 | |
| 6 | |
| 7 | def hot_github(keyword): |
| 8 | url = 'https://github.com/trending/{0}'.format(keyword) |
| 9 | main_url = 'https://github.com{0}' |
| 10 | html = requests.get(url).content.decode('utf-8') |
| 11 | reg_hot_url = re.compile('<h3 class="repo-list-name">\s*<a href="(.*?)">') |
| 12 | hot_url = [main_url.format(i) for i in re.findall(reg_hot_url, html)] |
| 13 | url_abstract_reg = re.compile('<p class="repo-list-description">\s*(.*?)\s*</p>') |
| 14 | summary_text = re.findall(url_abstract_reg, html) |
| 15 | hotDF = pd.DataFrame() |
| 16 | hotDF['项目简介'] = summary_text |
| 17 | hotDF['项目地址'] = hot_url |
| 18 | hotDF.to_csv('./github_hot.csv', index=False) |
| 19 | |
| 20 | if __name__ == '__main__': |
| 21 | keyword = input('请输入查找的热门语言:') |