(epoch, batch)
| 71 | |
| 72 | |
| 73 | def check_exit(epoch, batch): |
| 74 | key = str((epoch, batch)) |
| 75 | if key in exit_schedule: |
| 76 | ranks_to_exit = exit_schedule[key] |
| 77 | if start_rank in ranks_to_exit: |
| 78 | if args.exit_mode == 'exception': |
| 79 | raise RuntimeError('check_rank and exit epoch={} batch={} start_rank={} rank={}' |
| 80 | .format(epoch, batch, start_rank, hvd.rank())) |
| 81 | else: |
| 82 | psutil.Process(os.getpid()).kill() |
| 83 | |
| 84 | |
| 85 | def log_state(state): |