Function start_local_trainers_cpu

tests/parallel_launch.py:60–98 · view source on GitHub ↗

(trainer_endpoints, training_script, training_script_args, log_dir=None)

Source from the content-addressed store, hash-verified

58
59
60	def start_local_trainers_cpu(trainer_endpoints, training_script, training_script_args, log_dir=None):
61	current_env = copy.copy(os.environ.copy())
62	current_env.pop("http_proxy", None)
63	current_env.pop("https_proxy", None)
64
65	procs = []
66	n_rank = len(trainer_endpoints)
67	print(trainer_endpoints)
68	for rank_id, endpoint in enumerate(trainer_endpoints):
69	proc_env = {
70	"PADDLE_DISTRI_BACKEND": "gloo",
71	"PADDLE_TRAINER_ID": "%d" % rank_id,
72	"PADDLE_CURRENT_ENDPOINT": "%s" % endpoint,
73	"PADDLE_TRAINERS_NUM": "%d" % n_rank,
74	"PADDLE_TRAINER_ENDPOINTS": ",".join(trainer_endpoints),
75	}
76
77	current_env.update(proc_env)
78
79	print("trainer proc env:{}".format(current_env))
80
81	assert os.getenv("WITH_COVERAGE", "OFF") == "OFF", "Gloo don't support WITH_COVERAGE."
82	cmd = "python -u " + training_script
83
84	print("start trainer proc:{} env:{}".format(cmd, proc_env))
85
86	fn = None
87
88	proc = subprocess.Popen(cmd.split(" "), env=current_env)
89
90	tp = TrainerProc()
91	tp.proc = proc
92	tp.rank = rank_id
93	tp.log_fn = fn
94	tp.cmd = cmd
95
96	procs.append(tp)
97
98	return procs
99
100
101	def start_local_trainers(

run_2cpuMethod · 0.70

popMethod · 0.45

updateMethod · 0.45

splitMethod · 0.45

appendMethod · 0.45

no test coverage detected