| 57 | |
| 58 | |
| 59 | def parse_other(doc_id): |
| 60 | content_url = "https://wenku.baidu.com/browse/getbcsurl?doc_id=" + doc_id + "&pn=1&rn=99999&type=ppt" |
| 61 | content = fetch_url(content_url) |
| 62 | url_list = re.findall('{"zoom":"(.*?)","page"', content) |
| 63 | url_list = [item.replace("\\", '') for item in url_list] |
| 64 | if not os.path.exists(doc_id): |
| 65 | os.mkdir(doc_id) |
| 66 | for index, url in enumerate(url_list): |
| 67 | content = session.get(url).content |
| 68 | path = os.path.join(doc_id, str(index) + '.jpg') |
| 69 | with open(path, 'wb') as f: |
| 70 | f.write(content) |
| 71 | print("图片保存在" + doc_id + "文件夹") |
| 72 | |
| 73 | |
| 74 | def save_file(filename, content): |