Download the PDF file from the specified link and use the paper title as the file name. :param title: The paper title. :param url: The link to the PDF file. :return: A boolean indicating whether the download was successful.
(title, url)
| 25 | return info_list |
| 26 | |
| 27 | def download_file(title, url): |
| 28 | """ |
| 29 | Download the PDF file from the specified link and use the paper title as the file name. |
| 30 | :param title: The paper title. |
| 31 | :param url: The link to the PDF file. |
| 32 | :return: A boolean indicating whether the download was successful. |
| 33 | """ |
| 34 | try: |
| 35 | response = requests.get(url, stream=True) |
| 36 | response.raise_for_status() |
| 37 | |
| 38 | # Create the download directory. |
| 39 | download_dir = 'downloaded_pdfs' |
| 40 | if not os.path.exists(download_dir): |
| 41 | os.makedirs(download_dir) |
| 42 | # Use the paper title as the file name. |
| 43 | # Replace any invalid characters in the title with underscores. |
| 44 | if not url.endswith('.zip'): |
| 45 | file_name = os.path.join(download_dir, f"{title}.pdf") |
| 46 | else: |
| 47 | file_name = os.path.join(download_dir, f"{title}.zip") |
| 48 | # Check if the file already exists. |
| 49 | if os.path.exists(file_name): |
| 50 | print(f"File {file_name} already exists. Skipping download.") |
| 51 | return True |
| 52 | # Get the total size of the file from the response headers. |
| 53 | total_size = int(response.headers.get('content-length', 0)) |
| 54 | |
| 55 | print(f"Downloading paper {title}, file name is {file_name}...") |
| 56 | with open(file_name, 'wb') as file, tqdm( |
| 57 | desc=file_name, |
| 58 | total=total_size, |
| 59 | unit='B', |
| 60 | unit_scale=True, |
| 61 | unit_divisor=1024, |
| 62 | colour="yellow", |
| 63 | ascii=True, |
| 64 | ) as bar: |
| 65 | for data in response.iter_content(chunk_size=1024): |
| 66 | size = file.write(data) |
| 67 | bar.update(size) |
| 68 | |
| 69 | print(f"Paper {title} (file name {file_name}) downloaded successfully!") |
| 70 | return True |
| 71 | except requests.RequestException as e: |
| 72 | print(f"Error downloading paper {title} (link {url}): {e}") |
| 73 | return False |
| 74 | |
| 75 | def main(): |
| 76 | try: |