Download PDFs for all papers
(papers: List[Dict], output_dir: Path)
| 302 | return False |
| 303 | |
| 304 | async def download_papers(papers: List[Dict], output_dir: Path): |
| 305 | """Download PDFs for all papers""" |
| 306 | pdf_dir = output_dir / 'pdfs' |
| 307 | pdf_dir.mkdir(exist_ok=True) |
| 308 | |
| 309 | logging.info(f"Starting download of {len(papers)} papers") |
| 310 | |
| 311 | for i, paper in enumerate(papers, 1): |
| 312 | title = paper['title'] |
| 313 | url = paper['url'] |
| 314 | |
| 315 | clean_name = clean_title(title) |
| 316 | pdf_path = pdf_dir / f"{clean_name}.pdf" |
| 317 | |
| 318 | if pdf_path.exists(): |
| 319 | logging.info(f"[{i}/{len(papers)}] Already exists: {clean_name}") |
| 320 | continue |
| 321 | |
| 322 | logging.info(f"[{i}/{len(papers)}] Downloading: {clean_name}") |
| 323 | |
| 324 | if download_pdf(url, str(pdf_path)): |
| 325 | logging.info(f"Successfully downloaded: {clean_name}") |
| 326 | else: |
| 327 | logging.error(f"Failed to download: {clean_name}") |
| 328 | |
| 329 | await asyncio.sleep(3) |
| 330 | |
| 331 | async def main(): |
| 332 | output_dir = Path('paper_titles') |
no test coverage detected