MCPcopy
hub / github.com/ArchiveBox/ArchiveBox / save_media

Function save_media

archivebox/extractors/media.py:35–104  ·  view source on GitHub ↗

Download playlists or individual video, audio, and subtitles using youtube-dl or yt-dlp

(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIMEOUT)

Source from the content-addressed store, hash-verified

33
34@enforce_types
35def save_media(link: Link, out_dir: Optional[Path]=None, timeout: int=MEDIA_TIMEOUT) -> ArchiveResult:
36 """Download playlists or individual video, audio, and subtitles using youtube-dl or yt-dlp"""
37
38 out_dir = out_dir or Path(link.link_dir)
39 output: ArchiveOutput = 'media'
40 output_path = out_dir / output
41 output_path.mkdir(exist_ok=True)
42 cmd = [
43 YOUTUBEDL_BINARY,
44 *YOUTUBEDL_ARGS,
45 *([] if CHECK_SSL_VALIDITY else ['--no-check-certificate']),
46 # TODO: add --cookies-from-browser={CHROME_USER_DATA_DIR}
47 link.url,
48 ]
49 status = 'succeeded'
50 timer = TimedProgress(timeout, prefix=' ')
51 try:
52 result = run(cmd, cwd=str(output_path), timeout=timeout + 1)
53 chmod_file(output, cwd=str(out_dir))
54 if result.returncode:
55 if (b'ERROR: Unsupported URL' in result.stderr
56 or b'HTTP Error 404' in result.stderr
57 or b'HTTP Error 403' in result.stderr
58 or b'URL could be a direct video link' in result.stderr
59 or b'Unable to extract container ID' in result.stderr):
60 # These happen too frequently on non-media pages to warrant printing to console
61 pass
62 else:
63 hints = (
64 'Got youtube-dl (or yt-dlp) response code: {}.'.format(result.returncode),
65 *result.stderr.decode().split('\n'),
66 )
67 raise ArchiveError('Failed to save media', hints)
68 except Exception as err:
69 status = 'failed'
70 output = err
71 finally:
72 timer.end()
73
74 # add video description and subtitles to full-text index
75 # Let's try a few different
76 index_texts = [
77 # errors:
78 # * 'strict' to raise a ValueError exception if there is an
79 # encoding error. The default value of None has the same effect.
80 # * 'ignore' ignores errors. Note that ignoring encoding errors
81 # can lead to data loss.
82 # * 'xmlcharrefreplace' is only supported when writing to a
83 # file. Characters not supported by the encoding are replaced with
84 # the appropriate XML character reference &#nnn;.
85 # There are a few more options described in https://docs.python.org/3/library/functions.html#open
86 text_file.read_text(encoding='utf-8', errors='xmlcharrefreplace').strip()
87 for text_file in (
88 *output_path.glob('*.description'),
89 *output_path.glob('*.srt'),
90 *output_path.glob('*.vtt'),
91 *output_path.glob('*.lrc'),
92 *output_path.glob('*.lrc'),

Callers

nothing calls this directly

Calls 6

endMethod · 0.95
TimedProgressClass · 0.85
chmod_fileFunction · 0.85
ArchiveErrorClass · 0.85
runFunction · 0.50
ArchiveResultClass · 0.50

Tested by

no test coverage detected