MCPcopy
hub / github.com/deepspeedai/DeepSpeed / set_accelerator_visible

Function set_accelerator_visible

tests/unit/common.py:81–136  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

79
80
81def set_accelerator_visible():
82 cuda_visible = os.environ.get("CUDA_VISIBLE_DEVICES", None)
83 xdist_worker_id = get_xdist_worker_id()
84 if xdist_worker_id is None:
85 xdist_worker_id = 0
86 if cuda_visible is None:
87 # CUDA_VISIBLE_DEVICES is not set, discover it using accelerator specific command instead
88 if get_accelerator().device_name() == 'cuda':
89 if is_rocm_pytorch():
90 rocm_smi = subprocess.check_output(['rocm-smi', '--showid'])
91 gpu_ids = filter(lambda s: 'GPU' in s, rocm_smi.decode('utf-8').strip().split('\n'))
92 num_accelerators = len(list(gpu_ids))
93 else:
94 nvidia_smi = subprocess.check_output(['nvidia-smi', '--list-gpus'])
95 num_accelerators = len(nvidia_smi.decode('utf-8').strip().split('\n'))
96 elif get_accelerator().device_name() == 'xpu':
97 clinfo = subprocess.check_output(['clinfo'])
98 lines = clinfo.decode('utf-8').strip().split('\n')
99 num_accelerators = 0
100 for line in lines:
101 match = re.search('Device Type.*GPU', line)
102 if match:
103 num_accelerators += 1
104 elif get_accelerator().device_name() == 'hpu':
105 try:
106 hl_smi = subprocess.check_output(['hl-smi', "-L"])
107 num_accelerators = re.findall(r"Module ID\s+:\s+(\d+)", hl_smi.decode())
108 except FileNotFoundError:
109 sim_list = subprocess.check_output(['ls', '-1', '/dev/accel'])
110 num_accelerators = re.findall(r"accel(\d+)", sim_list.decode())
111 num_accelerators = sorted(num_accelerators, key=int)
112 os.environ["HABANA_VISIBLE_MODULES"] = ",".join(num_accelerators)
113 elif get_accelerator().device_name() == 'npu':
114 npu_smi = subprocess.check_output(['npu-smi', 'info', '-l'])
115 num_accelerators = int(npu_smi.decode('utf-8').strip().split('\n')[0].split(':')[1].strip())
116 elif get_accelerator().device_name() == 'supa':
117 br_smi = subprocess.check_output(['brsmi', 'gpu', 'list'])
118 gpu_ids = filter(lambda s: 'GPU' in s, br_smi.decode('utf-8').strip().split('\n'))
119 num_accelerators = len(list(gpu_ids))
120 else:
121 assert get_accelerator().device_name() == 'cpu'
122 num_accelerators = _get_cpu_socket_count()
123
124 if isinstance(num_accelerators, list):
125 cuda_visible = ",".join(num_accelerators)
126 else:
127 cuda_visible = ",".join(map(str, range(num_accelerators)))
128
129 # rotate list based on xdist worker id, example below
130 # wid=0 -> ['0', '1', '2', '3']
131 # wid=1 -> ['1', '2', '3', '0']
132 # wid=2 -> ['2', '3', '0', '1']
133 # wid=3 -> ['3', '0', '1', '2']
134 dev_id_list = cuda_visible.split(",")
135 dev_id_list = dev_id_list[xdist_worker_id:] + dev_id_list[:xdist_worker_id]
136 os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(dev_id_list)
137
138

Callers 1

_dist_runMethod · 0.85

Calls 7

get_acceleratorFunction · 0.90
get_xdist_worker_idFunction · 0.85
is_rocm_pytorchFunction · 0.85
_get_cpu_socket_countFunction · 0.85
decodeMethod · 0.80
getMethod · 0.45
device_nameMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…