Evaluation.
(data_loader, model, args, timers,
num_iterations=None)
| 219 | |
| 220 | |
| 221 | def evaluate(data_loader, model, args, timers, |
| 222 | num_iterations=None): |
| 223 | """Evaluation.""" |
| 224 | |
| 225 | # Turn on evaluation mode which disables dropout. |
| 226 | model.eval() |
| 227 | |
| 228 | total_lm_loss = 0 |
| 229 | if num_iterations is not None: |
| 230 | max_iters = num_iterations |
| 231 | else: |
| 232 | if mpu.get_model_parallel_rank() == 0: |
| 233 | max_iters_gpu = torch.cuda.LongTensor([len(data_loader)]) |
| 234 | else: |
| 235 | max_iters_gpu = torch.cuda.LongTensor([0]) |
| 236 | torch.distributed.broadcast(max_iters_gpu, |
| 237 | mpu.get_model_parallel_src_rank(), |
| 238 | group=mpu.get_model_parallel_group()) |
| 239 | max_iters = max_iters_gpu[0].item() |
| 240 | print_rank_0('global rank: {} | max iters: {}'.format( |
| 241 | torch.distributed.get_rank(), max_iters)) |
| 242 | |
| 243 | if data_loader is not None: |
| 244 | data_iterator = iter(data_loader) |
| 245 | else: |
| 246 | data_iterator = None |
| 247 | |
| 248 | with torch.no_grad(): |
| 249 | iteration = 0 |
| 250 | while iteration < max_iters: |
| 251 | if iteration % args.log_interval == 0: |
| 252 | print_rank_0('global rank: {} | iteration: {}'.format( |
| 253 | torch.distributed.get_rank(), iteration)) |
| 254 | # Forward evaluation. |
| 255 | lm_loss = forward_step(data_iterator, model, args, timers) |
| 256 | if lm_loss is None: |
| 257 | break |
| 258 | # Reduce across processes. |
| 259 | if isinstance(model, DDP): |
| 260 | torch.distributed.all_reduce(lm_loss.data) |
| 261 | if args.cloze_eval: |
| 262 | lm_loss.data = lm_loss.data / args.world_size |
| 263 | else: |
| 264 | lm_loss.data = lm_loss.data / args.model_parallel_size |
| 265 | |
| 266 | if not args.cloze_eval: |
| 267 | total_lm_loss += lm_loss.data.detach().float().item()/(args.num_tokenized_tokens-1) |
| 268 | else: |
| 269 | total_lm_loss += lm_loss.data.detach().float().item() |
| 270 | |
| 271 | iteration += 1 |
| 272 | |
| 273 | # Move model back to the train mode. |
| 274 | model.train() |
| 275 | |
| 276 | return total_lm_loss |
| 277 | |
| 278 |
no test coverage detected