()
| 443 | |
| 444 | |
| 445 | def main(): |
| 446 | parser = argparse.ArgumentParser(description="Launch a distributed job") |
| 447 | parser.add_argument("--ssh_port", type=int, default=22, help="SSH Port.") |
| 448 | parser.add_argument( |
| 449 | "--ssh_username", |
| 450 | default="", |
| 451 | help="Optional. When issuing commands (via ssh) to cluster, use the provided username in the ssh cmd. " |
| 452 | "Example: If you provide --ssh_username=bob, then the ssh command will be like: 'ssh bob@1.2.3.4 CMD' " |
| 453 | "instead of 'ssh 1.2.3.4 CMD'", |
| 454 | ) |
| 455 | parser.add_argument( |
| 456 | "--num_proc_per_machine", |
| 457 | type=int, |
| 458 | help="The number of server processes per machine", |
| 459 | ) |
| 460 | parser.add_argument( |
| 461 | "--master_port", |
| 462 | type=int, |
| 463 | help="This port is used to form gloo group (randevouz server)", |
| 464 | ) |
| 465 | parser.add_argument( |
| 466 | "--ip_config", |
| 467 | type=str, |
| 468 | help="The file (in workspace) of IP configuration for server processes", |
| 469 | ) |
| 470 | |
| 471 | args, udf_command = parser.parse_known_args() |
| 472 | assert len(udf_command) == 1, "Please provide user command line." |
| 473 | assert ( |
| 474 | args.num_proc_per_machine is not None and args.num_proc_per_machine > 0 |
| 475 | ), "--num_proc_per_machine must be a positive number." |
| 476 | assert ( |
| 477 | args.ip_config is not None |
| 478 | ), "A user has to specify an IP configuration file with --ip_config." |
| 479 | |
| 480 | udf_command = str(udf_command[0]) |
| 481 | if "python" not in udf_command: |
| 482 | raise RuntimeError( |
| 483 | "DGL launching script can only support Python executable file." |
| 484 | ) |
| 485 | |
| 486 | submit_jobs(args, udf_command) |
| 487 | |
| 488 | |
| 489 | if __name__ == "__main__": |
no test coverage detected