| 10 | |
| 11 | |
| 12 | class YouTubeTranscriptApi: |
| 13 | def __init__( |
| 14 | self, |
| 15 | proxy_config: Optional[ProxyConfig] = None, |
| 16 | http_client: Optional[Session] = None, |
| 17 | ): |
| 18 | """ |
| 19 | Note on thread-safety: As this class will initialize a `requests.Session` |
| 20 | object, it is not thread-safe. Make sure to initialize an instance of |
| 21 | `YouTubeTranscriptApi` per thread, if used in a multi-threading scenario! |
| 22 | |
| 23 | :param proxy_config: an optional ProxyConfig object, defining proxies used for |
| 24 | all network requests. This can be used to work around your IP being blocked |
| 25 | by YouTube, as described in the "Working around IP bans" section of the |
| 26 | README |
| 27 | (https://github.com/jdepoix/youtube-transcript-api?tab=readme-ov-file#working-around-ip-bans-requestblocked-or-ipblocked-exception) |
| 28 | :param http_client: You can optionally pass in a requests.Session object, if you |
| 29 | manually want to share cookies between different instances of |
| 30 | `YouTubeTranscriptApi`, overwrite defaults, specify SSL certificates, etc. |
| 31 | """ |
| 32 | http_client = Session() if http_client is None else http_client |
| 33 | http_client.headers.update({"Accept-Language": "en-US"}) |
| 34 | # Cookie auth has been temporarily disabled, as it is not working properly with |
| 35 | # YouTube's most recent changes. |
| 36 | # if cookie_path is not None: |
| 37 | # http_client.cookies = _load_cookie_jar(cookie_path) |
| 38 | if proxy_config is not None: |
| 39 | http_client.proxies = proxy_config.to_requests_dict() |
| 40 | if proxy_config.prevent_keeping_connections_alive: |
| 41 | http_client.headers.update({"Connection": "close"}) |
| 42 | if proxy_config.retries_when_blocked > 0: |
| 43 | retry_config = Retry( |
| 44 | total=proxy_config.retries_when_blocked, |
| 45 | status_forcelist=[429], |
| 46 | ) |
| 47 | http_client.mount("http://", HTTPAdapter(max_retries=retry_config)) |
| 48 | http_client.mount("https://", HTTPAdapter(max_retries=retry_config)) |
| 49 | self._fetcher = TranscriptListFetcher(http_client, proxy_config=proxy_config) |
| 50 | |
| 51 | def fetch( |
| 52 | self, |
| 53 | video_id: str, |
| 54 | languages: Iterable[str] = ("en",), |
| 55 | preserve_formatting: bool = False, |
| 56 | ) -> FetchedTranscript: |
| 57 | """ |
| 58 | Retrieves the transcript for a single video. This is just a shortcut for |
| 59 | calling: |
| 60 | `YouTubeTranscriptApi().list(video_id).find_transcript(languages).fetch(preserve_formatting=preserve_formatting)` |
| 61 | |
| 62 | :param video_id: the ID of the video you want to retrieve the transcript for. |
| 63 | Make sure that this is the actual ID, NOT the full URL to the video! |
| 64 | :param languages: A list of language codes in a descending priority. For |
| 65 | example, if this is set to ["de", "en"] it will first try to fetch the |
| 66 | german transcript (de) and then fetch the english transcript (en) if |
| 67 | it fails to do so. This defaults to ["en"]. |
| 68 | :param preserve_formatting: whether to keep select HTML text formatting |
| 69 | """ |
no outgoing calls
searching dependent graphs…