MCPcopy
hub / github.com/deepspeedai/DeepSpeed / AsyncIOBuilder

Class AsyncIOBuilder

op_builder/async_io.py:13–113  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

11
12
13class AsyncIOBuilder(TorchCPUOpBuilder):
14 BUILD_VAR = "DS_BUILD_AIO"
15 NAME = "async_io"
16
17 def __init__(self):
18 super().__init__(name=self.NAME)
19
20 def absolute_name(self):
21 return f'deepspeed.ops.aio.{self.NAME}_op'
22
23 def lib_sources(self):
24 src_list = [
25 'csrc/aio/py_lib/deepspeed_py_io_handle.cpp', 'csrc/aio/py_lib/deepspeed_py_aio.cpp',
26 'csrc/aio/py_lib/deepspeed_py_aio_handle.cpp', 'csrc/aio/py_lib/deepspeed_aio_thread.cpp',
27 'csrc/aio/common/deepspeed_aio_utils.cpp', 'csrc/aio/common/deepspeed_aio_common.cpp',
28 'csrc/aio/common/deepspeed_aio_types.cpp', 'csrc/aio/py_lib/deepspeed_cpu_op.cpp',
29 'csrc/aio/py_lib/deepspeed_aio_op_desc.cpp', 'csrc/aio/py_lib/deepspeed_py_copy.cpp',
30 'csrc/aio/py_lib/deepspeed_pin_tensor.cpp'
31 ]
32 return src_list
33
34 def sources(self):
35 return self.lib_sources() + ['csrc/aio/py_lib/py_ds_aio.cpp']
36
37 def include_paths(self):
38 import torch
39 if self.build_for_cpu:
40 CUDA_INCLUDE = []
41 elif not self.is_rocm_pytorch():
42 CUDA_INCLUDE = [os.path.join(torch.utils.cpp_extension.CUDA_HOME, "include")]
43 else:
44 CUDA_INCLUDE = [
45 os.path.join(torch.utils.cpp_extension.ROCM_HOME, "include"),
46 os.path.join(torch.utils.cpp_extension.ROCM_HOME, "include", "rocrand"),
47 os.path.join(torch.utils.cpp_extension.ROCM_HOME, "include", "hiprand"),
48 ]
49 return ['csrc/aio/py_lib', 'csrc/aio/common'] + CUDA_INCLUDE
50
51 def cxx_args(self):
52 # -O0 for improved debugging, since performance is bound by I/O
53 args = super().cxx_args()
54 import torch
55 TORCH_MAJOR, TORCH_MINOR = map(int, torch.__version__.split('.')[0:2])
56 if not (TORCH_MAJOR >= 2 and TORCH_MINOR >= 1):
57 args.remove('-std=c++17')
58 args.append('-std=c++14')
59 args += ['-Wall', '-O0', '-shared', '-fPIC', '-Wno-reorder']
60 return args
61
62 def extra_ldflags(self):
63 if self.build_for_cpu:
64 return ['-fopenmp']
65
66 import torch.utils.cpp_extension
67 CUDA_HOME = torch.utils.cpp_extension.CUDA_HOME
68 if CUDA_HOME is None:
69 ldflags = ['-laio'] # the ROCM case
70 else:

Callers 15

_build_writerFunction · 0.90
test_parallel_readMethod · 0.90
test_async_readMethod · 0.90
test_parallel_writeMethod · 0.90
test_async_writeMethod · 0.90
test_readMethod · 0.90
test_writeMethod · 0.90
test_offset_writeMethod · 0.90
test_offset_readMethod · 0.90
async_io_setupFunction · 0.90
__init__Method · 0.90

Calls

no outgoing calls

Tested by 9

_build_writerFunction · 0.72
test_parallel_readMethod · 0.72
test_async_readMethod · 0.72
test_parallel_writeMethod · 0.72
test_async_writeMethod · 0.72
test_readMethod · 0.72
test_writeMethod · 0.72
test_offset_writeMethod · 0.72
test_offset_readMethod · 0.72

Used in the wild real call sites across dependent graphs

searching dependent graphs…