(opt, callbacks=Callbacks())
| 477 | |
| 478 | |
| 479 | def main(opt, callbacks=Callbacks()): |
| 480 | # Checks |
| 481 | if RANK in {-1, 0}: |
| 482 | print_args(vars(opt)) |
| 483 | check_git_status() |
| 484 | check_requirements() |
| 485 | |
| 486 | # Resume (from specified or most recent last.pt) |
| 487 | if opt.resume and not check_comet_resume(opt) and not opt.evolve: |
| 488 | last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run()) |
| 489 | opt_yaml = last.parent.parent / 'opt.yaml' # train options yaml |
| 490 | opt_data = opt.data # original dataset |
| 491 | if opt_yaml.is_file(): |
| 492 | with open(opt_yaml, errors='ignore') as f: |
| 493 | d = yaml.safe_load(f) |
| 494 | else: |
| 495 | d = torch.load(last, map_location='cpu')['opt'] |
| 496 | opt = argparse.Namespace(**d) # replace |
| 497 | opt.cfg, opt.weights, opt.resume = '', str(last), True # reinstate |
| 498 | if is_url(opt_data): |
| 499 | opt.data = check_file(opt_data) # avoid HUB resume auth timeout |
| 500 | else: |
| 501 | opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = \ |
| 502 | check_file(opt.data), check_yaml(opt.cfg), check_yaml(opt.hyp), str(opt.weights), str(opt.project) # checks |
| 503 | assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified' |
| 504 | if opt.evolve: |
| 505 | if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve |
| 506 | opt.project = str(ROOT / 'runs/evolve') |
| 507 | opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume |
| 508 | if opt.name == 'cfg': |
| 509 | opt.name = Path(opt.cfg).stem # use model.yaml as name |
| 510 | opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) |
| 511 | |
| 512 | # DDP mode |
| 513 | device = select_device(opt.device, batch_size=opt.batch_size) |
| 514 | if LOCAL_RANK != -1: |
| 515 | msg = 'is not compatible with YOLOv5 Multi-GPU DDP training' |
| 516 | assert not opt.image_weights, f'--image-weights {msg}' |
| 517 | assert not opt.evolve, f'--evolve {msg}' |
| 518 | assert opt.batch_size != -1, f'AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size' |
| 519 | assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE' |
| 520 | assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command' |
| 521 | torch.cuda.set_device(LOCAL_RANK) |
| 522 | device = torch.device('cuda', LOCAL_RANK) |
| 523 | dist.init_process_group(backend="nccl" if dist.is_nccl_available() else "gloo") |
| 524 | |
| 525 | # Train |
| 526 | if not opt.evolve: |
| 527 | train(opt.hyp, opt, device, callbacks) |
| 528 | |
| 529 | # Evolve hyperparameters (optional) |
| 530 | else: |
| 531 | # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit) |
| 532 | meta = { |
| 533 | 'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3) |
| 534 | 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf) |
| 535 | 'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1 |
| 536 | 'weight_decay': (1, 0.0, 0.001), # optimizer weight decay |
no test coverage detected