| 34 | |
| 35 | |
| 36 | def get_args_parser(): |
| 37 | parser = argparse.ArgumentParser('DeiT training and evaluation script', add_help=False) |
| 38 | parser.add_argument('--batch-size', default=64, type=int) |
| 39 | parser.add_argument('--epochs', default=300, type=int) |
| 40 | parser.add_argument('--bce-loss', action='store_true') |
| 41 | parser.add_argument('--unscale-lr', action='store_true') |
| 42 | |
| 43 | # Model parameters |
| 44 | parser.add_argument('--model', default='deit_base_patch16_224', type=str, metavar='MODEL', |
| 45 | help='Name of model to train') |
| 46 | parser.add_argument('--input-size', default=224, type=int, help='images input size') |
| 47 | |
| 48 | parser.add_argument('--drop', type=float, default=0.0, metavar='PCT', |
| 49 | help='Dropout rate (default: 0.)') |
| 50 | parser.add_argument('--drop-path', type=float, default=0.1, metavar='PCT', |
| 51 | help='Drop path rate (default: 0.1)') |
| 52 | |
| 53 | parser.add_argument('--model-ema', action='store_true') |
| 54 | parser.add_argument('--no-model-ema', action='store_false', dest='model_ema') |
| 55 | parser.set_defaults(model_ema=True) |
| 56 | parser.add_argument('--model-ema-decay', type=float, default=0.99996, help='') |
| 57 | parser.add_argument('--model-ema-force-cpu', action='store_true', default=False, help='') |
| 58 | |
| 59 | # Optimizer parameters |
| 60 | parser.add_argument('--opt', default='adamw', type=str, metavar='OPTIMIZER', |
| 61 | help='Optimizer (default: "adamw"') |
| 62 | parser.add_argument('--opt-eps', default=1e-8, type=float, metavar='EPSILON', |
| 63 | help='Optimizer Epsilon (default: 1e-8)') |
| 64 | parser.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar='BETA', |
| 65 | help='Optimizer Betas (default: None, use opt default)') |
| 66 | parser.add_argument('--clip-grad', type=float, default=None, metavar='NORM', |
| 67 | help='Clip gradient norm (default: None, no clipping)') |
| 68 | parser.add_argument('--momentum', type=float, default=0.9, metavar='M', |
| 69 | help='SGD momentum (default: 0.9)') |
| 70 | parser.add_argument('--weight-decay', type=float, default=0.05, |
| 71 | help='weight decay (default: 0.05)') |
| 72 | # Learning rate schedule parameters |
| 73 | parser.add_argument('--sched', default='cosine', type=str, metavar='SCHEDULER', |
| 74 | help='LR scheduler (default: "cosine"') |
| 75 | parser.add_argument('--lr', type=float, default=5e-4, metavar='LR', |
| 76 | help='learning rate (default: 5e-4)') |
| 77 | parser.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct', |
| 78 | help='learning rate noise on/off epoch percentages') |
| 79 | parser.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT', |
| 80 | help='learning rate noise limit percent (default: 0.67)') |
| 81 | parser.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV', |
| 82 | help='learning rate noise std-dev (default: 1.0)') |
| 83 | parser.add_argument('--warmup-lr', type=float, default=1e-6, metavar='LR', |
| 84 | help='warmup learning rate (default: 1e-6)') |
| 85 | parser.add_argument('--min-lr', type=float, default=1e-5, metavar='LR', |
| 86 | help='lower lr bound for cyclic schedulers that hit 0 (1e-5)') |
| 87 | |
| 88 | parser.add_argument('--decay-epochs', type=float, default=30, metavar='N', |
| 89 | help='epoch interval to decay LR') |
| 90 | parser.add_argument('--warmup-epochs', type=int, default=5, metavar='N', |
| 91 | help='epochs to warmup LR, if scheduler supports') |
| 92 | parser.add_argument('--cooldown-epochs', type=int, default=10, metavar='N', |
| 93 | help='epochs to cooldown LR at min_lr, after cyclic schedule ends') |