(epoch, batch)
| 78 | |
| 79 | |
| 80 | def check_exit(epoch, batch): |
| 81 | key = str((epoch, batch)) |
| 82 | if key in exit_schedule: |
| 83 | ranks_to_exit = exit_schedule[key] |
| 84 | if start_rank in ranks_to_exit: |
| 85 | if args.exit_mode == 'exception': |
| 86 | raise RuntimeError('check_rank and exit epoch={} batch={} start_rank={} rank={}' |
| 87 | .format(epoch, batch, start_rank, hvd.rank())) |
| 88 | else: |
| 89 | psutil.Process(os.getpid()).kill() |
| 90 | |
| 91 | |
| 92 | def log_state(state): |