Args: - source: can be a path (str or Path), url, or a requests.response object - stream_info: optional stream info to use for the conversion. If None, infer from source - kwargs: additional arguments to pass to the converter
(
self,
source: Union[str, requests.Response, Path, BinaryIO],
*,
stream_info: Optional[StreamInfo] = None,
**kwargs: Any,
)
| 273 | warn("Plugins converters are already enabled.", RuntimeWarning) |
| 274 | |
| 275 | def convert( |
| 276 | self, |
| 277 | source: Union[str, requests.Response, Path, BinaryIO], |
| 278 | *, |
| 279 | stream_info: Optional[StreamInfo] = None, |
| 280 | **kwargs: Any, |
| 281 | ) -> DocumentConverterResult: # TODO: deal with kwargs |
| 282 | """ |
| 283 | Args: |
| 284 | - source: can be a path (str or Path), url, or a requests.response object |
| 285 | - stream_info: optional stream info to use for the conversion. If None, infer from source |
| 286 | - kwargs: additional arguments to pass to the converter |
| 287 | """ |
| 288 | |
| 289 | # Local path or url |
| 290 | if isinstance(source, str): |
| 291 | if ( |
| 292 | source.startswith("http:") |
| 293 | or source.startswith("https:") |
| 294 | or source.startswith("file:") |
| 295 | or source.startswith("data:") |
| 296 | ): |
| 297 | # Rename the url argument to mock_url |
| 298 | # (Deprecated -- use stream_info) |
| 299 | _kwargs = {k: v for k, v in kwargs.items()} |
| 300 | if "url" in _kwargs: |
| 301 | _kwargs["mock_url"] = _kwargs["url"] |
| 302 | del _kwargs["url"] |
| 303 | |
| 304 | return self.convert_uri(source, stream_info=stream_info, **_kwargs) |
| 305 | else: |
| 306 | return self.convert_local(source, stream_info=stream_info, **kwargs) |
| 307 | # Path object |
| 308 | elif isinstance(source, Path): |
| 309 | return self.convert_local(source, stream_info=stream_info, **kwargs) |
| 310 | # Request response |
| 311 | elif isinstance(source, requests.Response): |
| 312 | return self.convert_response(source, stream_info=stream_info, **kwargs) |
| 313 | # Binary stream |
| 314 | elif ( |
| 315 | hasattr(source, "read") |
| 316 | and callable(source.read) |
| 317 | and not isinstance(source, io.TextIOBase) |
| 318 | ): |
| 319 | return self.convert_stream(source, stream_info=stream_info, **kwargs) |
| 320 | else: |
| 321 | raise TypeError( |
| 322 | f"Invalid source type: {type(source)}. Expected str, requests.Response, BinaryIO." |
| 323 | ) |
| 324 | |
| 325 | def convert_local( |
| 326 | self, |