Download playlists or individual video, audio, and subtitles using youtube-dl or yt-dlp
(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIMEOUT)
| 33 | |
| 34 | @enforce_types |
| 35 | def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIMEOUT) -> ArchiveResult: |
| 36 | """Download playlists or individual video, audio, and subtitles using youtube-dl or yt-dlp""" |
| 37 | |
| 38 | out_dir = out_dir or Path(link.link_dir) |
| 39 | output: ArchiveOutput = 'media' |
| 40 | output_path = out_dir / output |
| 41 | output_path.mkdir(exist_ok=True) |
| 42 | cmd = [ |
| 43 | YOUTUBEDL_BINARY, |
| 44 | *YOUTUBEDL_ARGS, |
| 45 | *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']), |
| 46 | # TODO: add --cookies-from-browser={CHROME_USER_DATA_DIR} |
| 47 | link.url, |
| 48 | ] |
| 49 | status = 'succeeded' |
| 50 | timer = TimedProgress(timeout, prefix=' ') |
| 51 | try: |
| 52 | result = run(cmd, cwd=str(output_path), timeout=timeout + 1) |
| 53 | chmod_file(output, cwd=str(out_dir)) |
| 54 | if result.returncode: |
| 55 | if (b'ERROR: Unsupported URL' in result.stderr |
| 56 | or b'HTTP Error 404' in result.stderr |
| 57 | or b'HTTP Error 403' in result.stderr |
| 58 | or b'URL could be a direct video link' in result.stderr |
| 59 | or b'Unable to extract container ID' in result.stderr): |
| 60 | # These happen too frequently on non-media pages to warrant printing to console |
| 61 | pass |
| 62 | else: |
| 63 | hints = ( |
| 64 | 'Got youtube-dl (or yt-dlp) response code: {}.'.format(result.returncode), |
| 65 | *result.stderr.decode().split('\n'), |
| 66 | ) |
| 67 | raise ArchiveError('Failed to save media', hints) |
| 68 | except Exception as err: |
| 69 | status = 'failed' |
| 70 | output = err |
| 71 | finally: |
| 72 | timer.end() |
| 73 | |
| 74 | # add video description and subtitles to full-text index |
| 75 | # Let's try a few different |
| 76 | index_texts = [ |
| 77 | # errors: |
| 78 | # * 'strict' to raise a ValueError exception if there is an |
| 79 | # encoding error. The default value of None has the same effect. |
| 80 | # * 'ignore' ignores errors. Note that ignoring encoding errors |
| 81 | # can lead to data loss. |
| 82 | # * 'xmlcharrefreplace' is only supported when writing to a |
| 83 | # file. Characters not supported by the encoding are replaced with |
| 84 | # the appropriate XML character reference &#nnn;. |
| 85 | # There are a few more options described in https://docs.python.org/3/library/functions.html#open |
| 86 | text_file.read_text(encoding='utf-8', errors='xmlcharrefreplace').strip() |
| 87 | for text_file in ( |
| 88 | *output_path.glob('*.description'), |
| 89 | *output_path.glob('*.srt'), |
| 90 | *output_path.glob('*.vtt'), |
| 91 | *output_path.glob('*.lrc'), |
| 92 | *output_path.glob('*.lrc'), |
nothing calls this directly
no test coverage detected