MCPcopy
hub / github.com/PaddlePaddle/PaddleFormers / start_local_trainers_cpu

Function start_local_trainers_cpu

tests/parallel_launch.py:60–98  ·  view source on GitHub ↗
(trainer_endpoints, training_script, training_script_args, log_dir=None)

Source from the content-addressed store, hash-verified

58
59
60def start_local_trainers_cpu(trainer_endpoints, training_script, training_script_args, log_dir=None):
61 current_env = copy.copy(os.environ.copy())
62 current_env.pop("http_proxy", None)
63 current_env.pop("https_proxy", None)
64
65 procs = []
66 n_rank = len(trainer_endpoints)
67 print(trainer_endpoints)
68 for rank_id, endpoint in enumerate(trainer_endpoints):
69 proc_env = {
70 "PADDLE_DISTRI_BACKEND": "gloo",
71 "PADDLE_TRAINER_ID": "%d" % rank_id,
72 "PADDLE_CURRENT_ENDPOINT": "%s" % endpoint,
73 "PADDLE_TRAINERS_NUM": "%d" % n_rank,
74 "PADDLE_TRAINER_ENDPOINTS": ",".join(trainer_endpoints),
75 }
76
77 current_env.update(proc_env)
78
79 print("trainer proc env:{}".format(current_env))
80
81 assert os.getenv("WITH_COVERAGE", "OFF") == "OFF", "Gloo don't support WITH_COVERAGE."
82 cmd = "python -u " + training_script
83
84 print("start trainer proc:{} env:{}".format(cmd, proc_env))
85
86 fn = None
87
88 proc = subprocess.Popen(cmd.split(" "), env=current_env)
89
90 tp = TrainerProc()
91 tp.proc = proc
92 tp.rank = rank_id
93 tp.log_fn = fn
94 tp.cmd = cmd
95
96 procs.append(tp)
97
98 return procs
99
100
101def start_local_trainers(

Callers 1

run_2cpuMethod · 0.70

Calls 4

popMethod · 0.45
updateMethod · 0.45
splitMethod · 0.45
appendMethod · 0.45

Tested by

no test coverage detected