Single training step.
(data_iterator, model, optimizer, lr_scheduler,
args, timers)
| 355 | #input("Press Any Key To Continue ..") |
| 356 | |
| 357 | def train_step(data_iterator, model, optimizer, lr_scheduler, |
| 358 | args, timers): |
| 359 | """Single training step.""" |
| 360 | |
| 361 | # Forward model for one step. |
| 362 | timers('forward').start() |
| 363 | lm_loss = forward_step(data_iterator, model, args, timers) |
| 364 | timers('forward').stop() |
| 365 | |
| 366 | #print_rank_0("loss is {}".format(lm_loss)) |
| 367 | |
| 368 | # Calculate gradients, reduce across processes, and clip. |
| 369 | timers('backward').start() |
| 370 | lm_loss_reduced = backward_step(optimizer, model, lm_loss, args, timers) |
| 371 | timers('backward').stop() |
| 372 | |
| 373 | # Update parameters. |
| 374 | skipped_iter = 0 |
| 375 | timers('optimizer').start() |
| 376 | if args.deepspeed: |
| 377 | model.step() |
| 378 | else: |
| 379 | optimizer.step() |
| 380 | |
| 381 | # Update learning rate. |
| 382 | if not (args.fp16 and optimizer.overflow): |
| 383 | lr_scheduler.step() |
| 384 | else: |
| 385 | skipped_iter = 1 |
| 386 | timers('optimizer').stop() |
| 387 | |
| 388 | return lm_loss_reduced, skipped_iter |
| 389 | |
| 390 | |
| 391 | def train(model, optimizer, lr_scheduler, |
no test coverage detected