Class CUDA_Accelerator

accelerator/cuda_accelerator.py:29–404 · view source on GitHub ↗

Source from the content-addressed store, hash-verified

27
28
29	class CUDA_Accelerator(DeepSpeedAccelerator):
30	supports_nvtx_domain = True
31
32	def __init__(self):
33	self._name = 'cuda'
34	self._communication_backend_name = 'nccl' if sys.platform != 'win32' else 'gloo'
35	self._compile_backend = "inductor"
36	self._nvtx_domains = {}
37	if pynvml is None:
38	self._init_pynvml()
39
40	def _init_pynvml(self):
41	global pynvml
42	try:
43	import pynvml
44	except ImportError:
45	return
46	try:
47	pynvml.nvmlInit()
48	except pynvml.NVMLError:
49	pynvml = None
50	return
51
52	def is_synchronized_device(self):
53	return False
54
55	def use_host_timers(self):
56	return self.is_synchronized_device()
57
58	def resolves_data_dependency(self):
59	return self.is_synchronized_device()
60
61	def handles_memory_backpressure(self):
62	return self.is_synchronized_device()
63
64	# Device APIs
65	def device_name(self, device_index=None):
66	if device_index is None:
67	return 'cuda'
68	return 'cuda:{}'.format(device_index)
69
70	def communication_backend_version(self):
71	return torch.cuda.nccl.version()
72
73	def device(self, device_index=None):
74	return torch.device('cuda', device_index)
75
76	def set_device(self, device_index):
77	torch.cuda.set_device(device_index)
78
79	def current_device(self):
80	return torch.cuda.current_device()
81
82	def current_device_name(self):
83	return 'cuda:{}'.format(torch.cuda.current_device())
84
85	def device_count(self):
86	return torch.cuda.device_count()

get_acceleratorFunction · 0.85

no outgoing calls

no test coverage detected

searching dependent graphs…