MCPcopy Index your code
hub / github.com/zai-org/CogView / mpi_define_env

Function mpi_define_env

arguments.py:378–414  ·  view source on GitHub ↗

For training CogView via MPI to setup the connection. Omit this function if use the basic deepspeed pdsh runner.

(args)

Source from the content-addressed store, hash-verified

376
377
378def mpi_define_env(args):
379 ''' For training CogView via MPI to setup the connection.
380 Omit this function if use the basic deepspeed pdsh runner.
381 '''
382 from mpi4py import MPI
383 import subprocess
384 comm = MPI.COMM_WORLD
385 rank = comm.Get_rank()
386 world_size = comm.Get_size()
387
388 master_addr = None
389 if rank == 0:
390 hostname_cmd = ["hostname -I"]
391 result = subprocess.check_output(hostname_cmd, shell=True)
392 master_addr = result.decode('utf-8').split()[0]
393 master_addr = comm.bcast(master_addr, root=0)
394
395 # Determine local rank by assuming hostnames are unique
396 proc_name = MPI.Get_processor_name()
397 all_procs = comm.allgather(proc_name)
398 local_rank = sum([i == proc_name for i in all_procs[:rank]])
399
400 os.environ['RANK'] = str(rank)
401 os.environ['WORLD_SIZE'] = str(world_size)
402 args.local_rank = local_rank
403 args.world_size = world_size
404 args.rank = rank
405 os.environ['MASTER_ADDR'] = master_addr
406 os.environ['MASTER_PORT'] = "29500" # TORCH_DISTRIBUTED_DEFAULT_PORT = 29500
407
408 print(
409 "Discovered MPI settings of world_rank={}, local_rank={}, world_size={}, master_addr={}, master_port={}"
410 .format(os.environ['RANK'],
411 args.local_rank,
412 os.environ['WORLD_SIZE'],
413 os.environ['MASTER_ADDR'],
414 os.environ['MASTER_PORT']))

Callers 1

get_argsFunction · 0.85

Calls 1

decodeMethod · 0.45

Tested by

no test coverage detected