MCPcopy
hub / github.com/microsoft/markitdown / convert

Method convert

packages/markitdown/src/markitdown/_markitdown.py:275–323  ·  view source on GitHub ↗

Args: - source: can be a path (str or Path), url, or a requests.response object - stream_info: optional stream info to use for the conversion. If None, infer from source - kwargs: additional arguments to pass to the converter

(
        self,
        source: Union[str, requests.Response, Path, BinaryIO],
        *,
        stream_info: Optional[StreamInfo] = None,
        **kwargs: Any,
    )

Source from the content-addressed store, hash-verified

273 warn("Plugins converters are already enabled.", RuntimeWarning)
274
275 def convert(
276 self,
277 source: Union[str, requests.Response, Path, BinaryIO],
278 *,
279 stream_info: Optional[StreamInfo] = None,
280 **kwargs: Any,
281 ) -> DocumentConverterResult: # TODO: deal with kwargs
282 """
283 Args:
284 - source: can be a path (str or Path), url, or a requests.response object
285 - stream_info: optional stream info to use for the conversion. If None, infer from source
286 - kwargs: additional arguments to pass to the converter
287 """
288
289 # Local path or url
290 if isinstance(source, str):
291 if (
292 source.startswith("http:")
293 or source.startswith("https:")
294 or source.startswith("file:")
295 or source.startswith("data:")
296 ):
297 # Rename the url argument to mock_url
298 # (Deprecated -- use stream_info)
299 _kwargs = {k: v for k, v in kwargs.items()}
300 if "url" in _kwargs:
301 _kwargs["mock_url"] = _kwargs["url"]
302 del _kwargs["url"]
303
304 return self.convert_uri(source, stream_info=stream_info, **_kwargs)
305 else:
306 return self.convert_local(source, stream_info=stream_info, **kwargs)
307 # Path object
308 elif isinstance(source, Path):
309 return self.convert_local(source, stream_info=stream_info, **kwargs)
310 # Request response
311 elif isinstance(source, requests.Response):
312 return self.convert_response(source, stream_info=stream_info, **kwargs)
313 # Binary stream
314 elif (
315 hasattr(source, "read")
316 and callable(source.read)
317 and not isinstance(source, io.TextIOBase)
318 ):
319 return self.convert_stream(source, stream_info=stream_info, **kwargs)
320 else:
321 raise TypeError(
322 f"Invalid source type: {type(source)}. Expected str, requests.Response, BinaryIO."
323 )
324
325 def convert_local(
326 self,

Callers 15

mainFunction · 0.95
test_docx_commentsFunction · 0.95
test_docx_equationsFunction · 0.95
test_doc_rlinkFunction · 0.95
test_markitdown_remoteFunction · 0.95
test_exceptionsFunction · 0.95
test_markitdown_exiftoolFunction · 0.95
test_markitdown_llmFunction · 0.95

Calls 4

convert_uriMethod · 0.95
convert_localMethod · 0.95
convert_responseMethod · 0.95
convert_streamMethod · 0.95