MCPcopy Index your code
hub / github.com/zai-org/CogView / add_training_args

Function add_training_args

arguments.py:96–185  ·  view source on GitHub ↗

Training arguments.

(parser)

Source from the content-addressed store, hash-verified

94
95
96def add_training_args(parser):
97 """Training arguments."""
98
99 group = parser.add_argument_group('train', 'training configurations')
100
101 group.add_argument('--experiment-name', type=str, default="CogView",
102 help="The experiment name for summary and checkpoint")
103 group.add_argument('--batch-size', type=int, default=4,
104 help='Data Loader batch size')
105 group.add_argument('--weight-decay', type=float, default=0.01,
106 help='weight decay coefficient for L2 regularization')
107 group.add_argument('--checkpoint-activations', action='store_true',
108 help='checkpoint activation to allow for training '
109 'with larger models and sequences')
110 group.add_argument('--checkpoint-num-layers', type=int, default=1,
111 help='chunk size (number of layers) for checkpointing')
112 group.add_argument('--deepspeed-activation-checkpointing', action='store_true',
113 help='uses activation checkpointing from deepspeed')
114 group.add_argument('--clip-grad', type=float, default=1.0,
115 help='gradient clipping')
116 group.add_argument('--train-iters', type=int, default=1000000,
117 help='total number of iterations to train over all training runs')
118 group.add_argument('--log-interval', type=int, default=50,
119 help='report interval')
120 group.add_argument('--exit-interval', type=int, default=None,
121 help='Exit the program after this many new iterations.')
122 group.add_argument('--summary-dir', type=str, default="", help="The directory to store the summary")
123 group.add_argument('--seed', type=int, default=1234,
124 help='random seed')
125 group.add_argument('--img-tokenizer-path', type=str, default=None,
126 help='The checkpoint file path of image tokenizer.')
127 group.add_argument('--img-tokenizer-num-tokens', type=int, default=None,
128 help='The num tokens of image tokenizer. ONLY use for pretraining with img-tokenizer UNKNOW.')
129 # Batch prodecuer arguments
130 group.add_argument('--reset-position-ids', action='store_true',
131 help='Reset posistion ids after end-of-document token.')
132 group.add_argument('--reset-attention-mask', action='store_true',
133 help='Reset self attention maske after '
134 'end-of-document token.')
135
136 # Learning rate.
137 group.add_argument('--lr-decay-iters', type=int, default=None,
138 help='number of iterations to decay LR over,'
139 ' If None defaults to `--train-iters`*`--epochs`')
140 group.add_argument('--lr-decay-style', type=str, default='linear',
141 choices=['constant', 'linear', 'cosine', 'exponential'],
142 help='learning rate decay function')
143 group.add_argument('--lr-decay-ratio', type=float, default=0.1)
144 group.add_argument('--lr', type=float, default=1.0e-4,
145 help='initial learning rate')
146 group.add_argument('--warmup', type=float, default=0.01,
147 help='percentage of data to warmup on (.01 = 1% of all '
148 'training iters). Default 0.01')
149 # model checkpointing
150 group.add_argument('--save', type=str, default=None,
151 help='Output directory to save checkpoints to.')
152 group.add_argument('--save-interval', type=int, default=5000,
153 help='number of iterations between saves')

Callers 1

get_argsFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected