MCPcopy
hub / github.com/deepspeedai/DeepSpeedExamples / pretrain_validation

Function pretrain_validation

bing_bert/deepspeed_train.py:86–108  ·  view source on GitHub ↗
(args, index, model)

Source from the content-addressed store, hash-verified

84
85
86def pretrain_validation(args, index, model):
87 config = args.config
88 logger = args.logger
89
90 model.eval()
91 dataset = PreTrainingDataset(args.tokenizer, os.path.join(args.data_path_prefix, config['validation']['path']), args.logger,
92 args.max_seq_length, index, PretrainDataType.VALIDATION, args.max_predictions_per_seq)
93 data_batches = get_dataloader(args, dataset, eval_set=True)
94 eval_loss = 0
95 nb_eval_steps = 0
96 for batch in tqdm(data_batches):
97 batch = tuple(t.to(args.device) for t in batch)
98 tmp_eval_loss = model.network(batch, log=False)
99 dist.reduce(tmp_eval_loss, 0)
100 # Reduce to get the loss from all the GPU's
101 tmp_eval_loss = tmp_eval_loss / dist.get_world_size()
102 eval_loss += tmp_eval_loss.mean().item()
103 nb_eval_steps += 1
104 eval_loss = eval_loss / nb_eval_steps
105 logger.info(f"Validation Loss for epoch {index + 1} is: {eval_loss}")
106 if (not args.no_cuda and dist.get_rank() == 0) or (args.no_cuda and args.local_rank == -1):
107 args.summary_writer.add_scalar(f'Validation/Loss', eval_loss, index+1)
108 return
109
110def master_process(args):
111 return (not args.no_cuda and dist.get_rank() == 0) or (args.no_cuda and args.local_rank == -1)

Callers 2

trainFunction · 0.85
load_checkpointFunction · 0.85

Calls 5

PreTrainingDatasetClass · 0.90
get_dataloaderFunction · 0.85
evalMethod · 0.80
infoMethod · 0.80
toMethod · 0.45

Tested by

no test coverage detected