(son_url)
| 15 | for i in x_url] all_url_list.extend(main_url) |
| 16 | return all_url_list |
| 17 | def get_title(son_url): # 判断该网页是否为校园招聘 |
| 18 | html = requests.get(son_url).content.decode('gbk') |
| 19 | explain_text_reg = re.compile('<h1 class="newstitle">(.*?)</h1>') |
| 20 | explain_text = re.findall(explain_text_reg, html)[0] |
| 21 | if ('时间' and '地点') in explain_text: |
| 22 | return True |
| 23 | else: pass |
| 24 | def save_html(): |
| 25 | all_url_list = crawl_all_main_url() |
| 26 | for son_url in all_url_list: |