(self,
*,
architecture: str,
dtype: str,
hidden_size: int,
num_hidden_layers: int,
num_attention_heads: int,
vocab_size: Optional[int] = None,
hidden_act: str = 'gelu',
logits_dtype: str = 'float32',
norm_epsilon: float = 1e-5,
position_embedding_type: Union[
PositionEmbeddingType,
str] = PositionEmbeddingType.learned_absolute,
max_position_embeddings: Optional[int] = None,
rotary_embedding_dim: Optional[int] = None,
num_key_value_heads: Optional[int] = None,
intermediate_size: Optional[int] = None,
mapping: Optional[Union[Mapping, dict]] = None,
quantization: Optional[Union[QuantConfig, dict]] = None,
use_parallel_embedding: bool = False,
embedding_sharding_dim: int = 0,
head_size: Optional[int] = None,
qk_layernorm: bool = False,
runtime_defaults: "RuntimeDefaultsIn" = None,
**kwargs)
| 369 | class PretrainedConfig: |
| 370 | |
| 371 | def __init__(self, |
| 372 | *, |
| 373 | architecture: str, |
| 374 | dtype: str, |
| 375 | hidden_size: int, |
| 376 | num_hidden_layers: int, |
| 377 | num_attention_heads: int, |
| 378 | vocab_size: Optional[int] = None, |
| 379 | hidden_act: str = 'gelu', |
| 380 | logits_dtype: str = 'float32', |
| 381 | norm_epsilon: float = 1e-5, |
| 382 | position_embedding_type: Union[ |
| 383 | PositionEmbeddingType, |
| 384 | str] = PositionEmbeddingType.learned_absolute, |
| 385 | max_position_embeddings: Optional[int] = None, |
| 386 | rotary_embedding_dim: Optional[int] = None, |
| 387 | num_key_value_heads: Optional[int] = None, |
| 388 | intermediate_size: Optional[int] = None, |
| 389 | mapping: Optional[Union[Mapping, dict]] = None, |
| 390 | quantization: Optional[Union[QuantConfig, dict]] = None, |
| 391 | use_parallel_embedding: bool = False, |
| 392 | embedding_sharding_dim: int = 0, |
| 393 | head_size: Optional[int] = None, |
| 394 | qk_layernorm: bool = False, |
| 395 | runtime_defaults: "RuntimeDefaultsIn" = None, |
| 396 | **kwargs): |
| 397 | self.architecture = architecture |
| 398 | self.dtype = dtype |
| 399 | self.vocab_size = vocab_size |
| 400 | self.hidden_size = hidden_size |
| 401 | self.num_hidden_layers = num_hidden_layers |
| 402 | self.num_attention_heads = num_attention_heads |
| 403 | self.hidden_act = hidden_act |
| 404 | |
| 405 | self.logits_dtype = logits_dtype |
| 406 | self.norm_epsilon = norm_epsilon |
| 407 | |
| 408 | self.runtime_defaults = self.create_runtime_defaults(runtime_defaults) |
| 409 | |
| 410 | if isinstance(position_embedding_type, str): |
| 411 | position_embedding_type = PositionEmbeddingType.from_string( |
| 412 | position_embedding_type) |
| 413 | assert isinstance(position_embedding_type, PositionEmbeddingType) |
| 414 | self.position_embedding_type = position_embedding_type |
| 415 | |
| 416 | if num_key_value_heads is None: |
| 417 | num_key_value_heads = num_attention_heads |
| 418 | self.num_key_value_heads = num_key_value_heads |
| 419 | |
| 420 | if intermediate_size is None: |
| 421 | intermediate_size = hidden_size * 4 |
| 422 | self.intermediate_size = intermediate_size |
| 423 | self.max_position_embeddings = max_position_embeddings |
| 424 | |
| 425 | if mapping is None: |
| 426 | mapping = Mapping() |
| 427 | elif isinstance(mapping, dict): |
| 428 | mapping = Mapping.from_dict(mapping) |
nothing calls this directly
no test coverage detected