MCPcopy
hub / github.com/deepspeedai/DeepSpeed / CUDA_Accelerator

Class CUDA_Accelerator

accelerator/cuda_accelerator.py:29–404  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

27
28
29class CUDA_Accelerator(DeepSpeedAccelerator):
30 supports_nvtx_domain = True
31
32 def __init__(self):
33 self._name = 'cuda'
34 self._communication_backend_name = 'nccl' if sys.platform != 'win32' else 'gloo'
35 self._compile_backend = "inductor"
36 self._nvtx_domains = {}
37 if pynvml is None:
38 self._init_pynvml()
39
40 def _init_pynvml(self):
41 global pynvml
42 try:
43 import pynvml
44 except ImportError:
45 return
46 try:
47 pynvml.nvmlInit()
48 except pynvml.NVMLError:
49 pynvml = None
50 return
51
52 def is_synchronized_device(self):
53 return False
54
55 def use_host_timers(self):
56 return self.is_synchronized_device()
57
58 def resolves_data_dependency(self):
59 return self.is_synchronized_device()
60
61 def handles_memory_backpressure(self):
62 return self.is_synchronized_device()
63
64 # Device APIs
65 def device_name(self, device_index=None):
66 if device_index is None:
67 return 'cuda'
68 return 'cuda:{}'.format(device_index)
69
70 def communication_backend_version(self):
71 return torch.cuda.nccl.version()
72
73 def device(self, device_index=None):
74 return torch.device('cuda', device_index)
75
76 def set_device(self, device_index):
77 torch.cuda.set_device(device_index)
78
79 def current_device(self):
80 return torch.cuda.current_device()
81
82 def current_device_name(self):
83 return 'cuda:{}'.format(torch.cuda.current_device())
84
85 def device_count(self):
86 return torch.cuda.device_count()

Callers 1

get_acceleratorFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…