| 23 | |
| 24 | |
| 25 | def get_li(doc): |
| 26 | soup = BeautifulSoup(doc, 'html.parser') |
| 27 | ol = soup.find('ol', class_='grid_view') |
| 28 | name = [] # 名字 |
| 29 | star_con = [] # 评价人数 |
| 30 | score = [] # 评分 |
| 31 | info_list = [] # 短评 |
| 32 | for i in ol.find_all('li'): |
| 33 | detail = i.find('div', attrs={'class': 'hd'}) |
| 34 | movie_name = detail.find( |
| 35 | 'span', attrs={'class': 'title'}).get_text() # 电影名字 |
| 36 | level_star = i.find( |
| 37 | 'span', attrs={'class': 'rating_num'}).get_text() # 评分 |
| 38 | star = i.find('div', attrs={'class': 'star'}) |
| 39 | star_num = star.find(text=re.compile('评价')) # 评价 |
| 40 | |
| 41 | info = i.find('span', attrs={'class': 'inq'}) # 短评 |
| 42 | if info: # 判断是否有短评 |
| 43 | info_list.append(info.get_text()) |
| 44 | else: |
| 45 | info_list.append('无') |
| 46 | score.append(level_star) |
| 47 | |
| 48 | name.append(movie_name) |
| 49 | star_con.append(star_num) |
| 50 | page = soup.find('span', attrs={'class': 'next'}).find('a') # 获取下一页 |
| 51 | if page: |
| 52 | return name, star_con, score, info_list, DOWNLOAD_URL + page['href'] |
| 53 | return name, star_con, score, info_list, None |
| 54 | |
| 55 | |
| 56 | def main(): |