MCPcopy
hub / github.com/deepspeedai/DeepSpeedExamples / evaluate

Function evaluate

Megatron-LM/pretrain_bert.py:378–412  ·  view source on GitHub ↗

Evaluation.

(data_iterator, model, args, timers, verbose = False)

Source from the content-addressed store, hash-verified

376
377
378def evaluate(data_iterator, model, args, timers, verbose = False):
379 """Evaluation."""
380
381 # Turn on evaluation mode which disables dropout.
382 model.eval()
383
384 total_lm_loss = 0
385 total_nsp_loss = 0
386
387 with torch.no_grad():
388 iteration = 0
389 while iteration < args.eval_iters:
390 iteration += 1
391 if verbose and iteration % args.log_interval == 0:
392 print_rank_0('Evaluating iter {}/{}'.format(iteration, args.eval_iters))
393 # Forward evaluation.
394 lm_loss, nsp_loss = forward_step(data_iterator, model,
395 args, timers)
396 # Reduce across processes.
397 if isinstance(model, DDP):
398 reduced_losses = torch.cat((lm_loss.view(1), nsp_loss.view(1)))
399 torch.distributed.all_reduce(reduced_losses.data)
400 reduced_losses.data = reduced_losses.data/args.world_size
401 lm_loss = reduced_losses[0]
402 nsp_loss = reduced_losses[1]
403
404 total_lm_loss += lm_loss.data.detach().float().item()
405 total_nsp_loss += nsp_loss.data.detach().float().item()
406
407 # Move model back to the train mode.
408 model.train()
409
410 total_lm_loss /= args.eval_iters
411 total_nsp_loss /= args.eval_iters
412 return total_lm_loss, total_nsp_loss
413
414
415def evaluate_and_print_results(prefix, data_iterator, model,

Callers 1

Calls 4

print_rank_0Function · 0.90
evalMethod · 0.80
trainMethod · 0.80
forward_stepFunction · 0.70

Tested by

no test coverage detected