Get all remote processes.
(hosts, ssh_port, udf_command)
| 176 | |
| 177 | |
| 178 | def get_all_remote_pids(hosts, ssh_port, udf_command): |
| 179 | """Get all remote processes.""" |
| 180 | remote_pids = {} |
| 181 | for node_id, host in enumerate(hosts): |
| 182 | ip, _ = host |
| 183 | # When creating training processes in remote machines, we may insert some arguments |
| 184 | # in the commands. We need to use regular expressions to match the modified command. |
| 185 | cmds = udf_command.split() |
| 186 | new_udf_command = " .*".join(cmds) |
| 187 | pids = get_remote_pids(ip, ssh_port, new_udf_command) |
| 188 | remote_pids[(ip, ssh_port)] = pids |
| 189 | return remote_pids |
| 190 | |
| 191 | |
| 192 | def construct_torch_dist_launcher_cmd( |
no test coverage detected