MCPcopy Index your code
hub / github.com/huggingface/datasets / _stream_subprocess

Function _stream_subprocess

tests/utils.py:579–617  ·  view source on GitHub ↗
(cmd, env=None, stdin=None, timeout=None, quiet=False, echo=False)

Source from the content-addressed store, hash-verified

577
578
579async def _stream_subprocess(cmd, env=None, stdin=None, timeout=None, quiet=False, echo=False) -> _RunOutput:
580 if echo:
581 print("\nRunning: ", " ".join(cmd))
582
583 p = await asyncio.create_subprocess_exec(
584 cmd[0],
585 *cmd[1:],
586 stdin=stdin,
587 stdout=asyncio.subprocess.PIPE,
588 stderr=asyncio.subprocess.PIPE,
589 env=env,
590 )
591
592 # note: there is a warning for a possible deadlock when using `wait` with huge amounts of data in the pipe
593 # https://docs.python.org/3/library/asyncio-subprocess.html#asyncio.asyncio.subprocess.Process.wait
594 #
595 # If it starts hanging, will need to switch to the following code. The problem is that no data
596 # will be seen until it's done and if it hangs for example there will be no debug info.
597 # out, err = await p.communicate()
598 # return _RunOutput(p.returncode, out, err)
599
600 out = []
601 err = []
602
603 def tee(line, sink, pipe, label=""):
604 line = line.decode("utf-8").rstrip()
605 sink.append(line)
606 if not quiet:
607 print(label, line, file=pipe)
608
609 # XXX: the timeout doesn't seem to make any difference here
610 await asyncio.wait(
611 [
612 _read_stream(p.stdout, lambda line: tee(line, out, sys.stdout, label="stdout:")),
613 _read_stream(p.stderr, lambda line: tee(line, err, sys.stderr, label="stderr:")),
614 ],
615 timeout=timeout,
616 )
617 return _RunOutput(await p.wait(), out, err)
618
619
620def execute_subprocess_async(cmd, env=None, stdin=None, timeout=180, quiet=False, echo=True) -> _RunOutput:

Callers 1

execute_subprocess_asyncFunction · 0.85

Calls 3

_read_streamFunction · 0.85
teeFunction · 0.85
_RunOutputClass · 0.85

Tested by

no test coverage detected