MCPcopy Index your code
hub / github.com/zai-org/CogView / add_model_config_args

Function add_model_config_args

arguments.py:25–64  ·  view source on GitHub ↗

Model arguments

(parser)

Source from the content-addressed store, hash-verified

23
24
25def add_model_config_args(parser):
26 """Model arguments"""
27
28 group = parser.add_argument_group('model', 'model configuration')
29
30 group.add_argument('--attention-dropout', type=float, default=0.1,
31 help='dropout probability for attention weights')
32 group.add_argument('--num-attention-heads', type=int, default=16,
33 help='num of transformer attention heads')
34 group.add_argument('--hidden-size', type=int, default=1024,
35 help='tansformer hidden size')
36 group.add_argument('--num-layers', type=int, default=24,
37 help='num decoder layers')
38 group.add_argument('--layernorm-epsilon', type=float, default=1e-5,
39 help='layer norm epsilon')
40 group.add_argument('--hidden-dropout', type=float, default=0.1,
41 help='dropout probability for hidden state transformer')
42 group.add_argument('--max-position-embeddings', type=int, default=512,
43 help='maximum number of position embeddings to use')
44 group.add_argument('--vocab-size', type=int, default=30522,
45 help='vocab size to use for non-character-level '
46 'tokenization. This value will only be used when '
47 'creating a tokenizer')
48 group.add_argument('--deep-init', action='store_true',
49 help='initialize bert model similar to gpt2 model.'
50 'scales initialization of projection layers by a '
51 'factor of 1/sqrt(2N). Necessary to train bert '
52 'models larger than BERT-Large.')
53 group.add_argument('--make-vocab-size-divisible-by', type=int, default=128,
54 help='Pad the vocab size to be divisible by this value.'
55 'This is added for computational efficieny reasons.')
56 group.add_argument('--cpu-optimizer', action='store_true',
57 help='Run optimizer on CPU')
58 group.add_argument('--cpu_torch_adam', action='store_true',
59 help='Use Torch Adam as optimizer on CPU.')
60
61 group.add_argument('--max-position-embeddings-finetune', type=int, default=-1,
62 help='maximum number of position embeddings to use in finetune')
63
64 return parser
65
66
67def add_fp16_config_args(parser):

Callers 1

get_argsFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected