hub / github.com/deepspeedai/DeepSpeed / set_accelerator_visible

Function set_accelerator_visible

tests/unit/common.py:81–136 · view source on GitHub ↗

()

Source from the content-addressed store, hash-verified

79
80
81	def set_accelerator_visible():
82	cuda_visible = os.environ.get("CUDA_VISIBLE_DEVICES", None)
83	xdist_worker_id = get_xdist_worker_id()
84	if xdist_worker_id is None:
85	xdist_worker_id = 0
86	if cuda_visible is None:
87	# CUDA_VISIBLE_DEVICES is not set, discover it using accelerator specific command instead
88	if get_accelerator().device_name() == 'cuda':
89	if is_rocm_pytorch():
90	rocm_smi = subprocess.check_output(['rocm-smi', '--showid'])
91	gpu_ids = filter(lambda s: 'GPU' in s, rocm_smi.decode('utf-8').strip().split('\n'))
92	num_accelerators = len(list(gpu_ids))
93	else:
94	nvidia_smi = subprocess.check_output(['nvidia-smi', '--list-gpus'])
95	num_accelerators = len(nvidia_smi.decode('utf-8').strip().split('\n'))
96	elif get_accelerator().device_name() == 'xpu':
97	clinfo = subprocess.check_output(['clinfo'])
98	lines = clinfo.decode('utf-8').strip().split('\n')
99	num_accelerators = 0
100	for line in lines:
101	match = re.search('Device Type.*GPU', line)
102	if match:
103	num_accelerators += 1
104	elif get_accelerator().device_name() == 'hpu':
105	try:
106	hl_smi = subprocess.check_output(['hl-smi', "-L"])
107	num_accelerators = re.findall(r"Module ID\s+:\s+(\d+)", hl_smi.decode())
108	except FileNotFoundError:
109	sim_list = subprocess.check_output(['ls', '-1', '/dev/accel'])
110	num_accelerators = re.findall(r"accel(\d+)", sim_list.decode())
111	num_accelerators = sorted(num_accelerators, key=int)
112	os.environ["HABANA_VISIBLE_MODULES"] = ",".join(num_accelerators)
113	elif get_accelerator().device_name() == 'npu':
114	npu_smi = subprocess.check_output(['npu-smi', 'info', '-l'])
115	num_accelerators = int(npu_smi.decode('utf-8').strip().split('\n')[0].split(':')[1].strip())
116	elif get_accelerator().device_name() == 'supa':
117	br_smi = subprocess.check_output(['brsmi', 'gpu', 'list'])
118	gpu_ids = filter(lambda s: 'GPU' in s, br_smi.decode('utf-8').strip().split('\n'))
119	num_accelerators = len(list(gpu_ids))
120	else:
121	assert get_accelerator().device_name() == 'cpu'
122	num_accelerators = _get_cpu_socket_count()
123
124	if isinstance(num_accelerators, list):
125	cuda_visible = ",".join(num_accelerators)
126	else:
127	cuda_visible = ",".join(map(str, range(num_accelerators)))
128
129	# rotate list based on xdist worker id, example below
130	# wid=0 -> ['0', '1', '2', '3']
131	# wid=1 -> ['1', '2', '3', '0']
132	# wid=2 -> ['2', '3', '0', '1']
133	# wid=3 -> ['3', '0', '1', '2']
134	dev_id_list = cuda_visible.split(",")
135	dev_id_list = dev_id_list[xdist_worker_id:] + dev_id_list[:xdist_worker_id]
136	os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(dev_id_list)
137
138

Callers 1

_dist_runMethod · 0.85

Calls 7

get_acceleratorFunction · 0.90

get_xdist_worker_idFunction · 0.85

is_rocm_pytorchFunction · 0.85

_get_cpu_socket_countFunction · 0.85

decodeMethod · 0.80

getMethod · 0.45

device_nameMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…