MCPcopy
hub / github.com/Tele-AI/Telechat / __init__

Method __init__

models/12B_4bit/configuration_telechat.py:53–92  ·  view source on GitHub ↗
(
        self,
        vocab_size=160256,
        hidden_size=4096,
        n_layer=30,
        n_head=32,
        layer_norm_epsilon=1e-5,
        initializer_range=0.02,
        use_cache=True,
        bos_token_id=1,
        eos_token_id=2,
        apply_residual_connection_post_layernorm=False,
        hidden_dropout=0.0,
        attention_dropout=0.0,
        ffn_hidden_size=12288,
        training_seqlen = 8192,
        logn = True,
        embed_layernorm = False,
        **kwargs,
    )

Source from the content-addressed store, hash-verified

51 }
52
53 def __init__(
54 self,
55 vocab_size=160256,
56 hidden_size=4096,
57 n_layer=30,
58 n_head=32,
59 layer_norm_epsilon=1e-5,
60 initializer_range=0.02,
61 use_cache=True,
62 bos_token_id=1,
63 eos_token_id=2,
64 apply_residual_connection_post_layernorm=False,
65 hidden_dropout=0.0,
66 attention_dropout=0.0,
67 ffn_hidden_size=12288,
68 training_seqlen = 8192,
69 logn = True,
70 embed_layernorm = False,
71 **kwargs,
72 ):
73 self.vocab_size = vocab_size
74 n_embed = kwargs.pop("n_embed", None)
75 self.hidden_size = hidden_size if n_embed is None else n_embed
76 self.n_layer = n_layer
77 self.n_head = n_head
78 self.layer_norm_epsilon = layer_norm_epsilon
79 self.initializer_range = initializer_range
80 self.use_cache = use_cache
81 self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
82 self.hidden_dropout = hidden_dropout
83 self.attention_dropout = attention_dropout
84 self.bos_token_id = bos_token_id
85 self.eos_token_id = eos_token_id
86 self.logn = logn
87 self.ffn_hidden_size = ffn_hidden_size
88 self.training_seqlen = training_seqlen
89 self.embed_layernorm = embed_layernorm
90
91
92 super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
93

Callers

nothing calls this directly

Calls 1

popMethod · 0.45

Tested by

no test coverage detected