| 131 | |
| 132 | |
| 133 | class LLaMAConfig(PretrainedConfig): |
| 134 | model_type = "llama" |
| 135 | |
| 136 | def __init__( |
| 137 | self, |
| 138 | vocab_size=32000, |
| 139 | hidden_size=4096, |
| 140 | intermediate_size=11008, |
| 141 | num_hidden_layers=32, |
| 142 | num_attention_heads=32, |
| 143 | max_sequence_length=4096, |
| 144 | rms_norm_eps=1e-6, |
| 145 | initializer_range=0.02, |
| 146 | use_cache=True, |
| 147 | bos_token_id=0, |
| 148 | eos_token_id=1, |
| 149 | resid_pdrop=0.0, |
| 150 | embd_pdrop=0.0, |
| 151 | attn_pdrop=0.0, |
| 152 | tie_word_embeddings=False, |
| 153 | scan_attention=True, |
| 154 | scan_mlp=True, |
| 155 | scan_query_chunk_size=1024, |
| 156 | scan_key_chunk_size=1024, |
| 157 | scan_mlp_chunk_size=1024, |
| 158 | scan_layers=True, |
| 159 | param_scan_axis=0, |
| 160 | mesh_dim=None, |
| 161 | theta=10000, |
| 162 | **kwargs, |
| 163 | ): |
| 164 | self.vocab_size = vocab_size |
| 165 | self.hidden_size = hidden_size |
| 166 | self.initializer_range = initializer_range |
| 167 | self.intermediate_size = intermediate_size |
| 168 | self.num_hidden_layers = num_hidden_layers |
| 169 | self.num_attention_heads = num_attention_heads |
| 170 | self.max_sequence_length = max_sequence_length |
| 171 | self.rms_norm_eps = rms_norm_eps |
| 172 | self.use_cache = use_cache |
| 173 | self.resid_pdrop = resid_pdrop |
| 174 | self.embd_pdrop = embd_pdrop |
| 175 | self.attn_pdrop = attn_pdrop |
| 176 | self.scan_attention = scan_attention |
| 177 | self.scan_mlp = scan_mlp |
| 178 | self.scan_query_chunk_size = scan_query_chunk_size |
| 179 | self.scan_key_chunk_size = scan_key_chunk_size |
| 180 | self.scan_mlp_chunk_size = scan_mlp_chunk_size |
| 181 | self.scan_layers = scan_layers |
| 182 | self.param_scan_axis = param_scan_axis |
| 183 | self.mesh_dim = mesh_dim |
| 184 | self.theta = theta |
| 185 | super().__init__( |
| 186 | bos_token_id=bos_token_id, |
| 187 | eos_token_id=eos_token_id, |
| 188 | tie_word_embeddings=tie_word_embeddings, |
| 189 | **kwargs, |
| 190 | ) |
no outgoing calls
no test coverage detected