(self, config: PretrainedConfig)
| 599 | class EncoderModel(PretrainedModel): |
| 600 | |
| 601 | def __init__(self, config: PretrainedConfig): |
| 602 | self.check_config(config) |
| 603 | super().__init__(config) |
| 604 | self.mapping = self.config.mapping |
| 605 | |
| 606 | self.has_position_embedding = self.config.has_position_embedding |
| 607 | type_vocab_size = self.config.type_vocab_size |
| 608 | self.has_token_type_embedding = False if type_vocab_size is None else True |
| 609 | |
| 610 | # e.g. BART regular, T5 RMS |
| 611 | self.layernorm_type = self.config.layernorm_type |
| 612 | |
| 613 | # e.g. BART true, T5 false |
| 614 | self.has_attention_qkvo_bias = self.config.has_attention_qkvo_bias |
| 615 | self.has_mlp_bias = self.config.has_mlp_bias |
| 616 | |
| 617 | # e.g. BART false, T5 true |
| 618 | self.has_model_final_layernorm = self.config.has_model_final_layernorm |
| 619 | |
| 620 | self._dtype = self.config.dtype |
| 621 | |
| 622 | self.total_num_layers = self.config.num_hidden_layers |
| 623 | self.num_layers = self.config.num_hidden_layers // self.mapping.pp_size |
| 624 | |
| 625 | self.hidden_size = self.config.hidden_size |
| 626 | self.num_heads = self.config.num_attention_heads |
| 627 | num_kv_heads = self.num_heads |
| 628 | if num_kv_heads is None or num_kv_heads <= 0: |
| 629 | num_kv_heads = self.config.num_attention_heads |
| 630 | self.num_kv_heads = num_kv_heads |
| 631 | self.head_size = self.hidden_size // self.num_heads if self.config.head_size is None else self.config.head_size |
| 632 | |
| 633 | self.fp16_clamping = (self.config.dtype |
| 634 | == 'float16') and (self.config.model_type == 't5') |
| 635 | self.mlp_type = MLPType.MLP if not hasattr( |
| 636 | self.config, "mlp_type") else self.config.mlp_type |
| 637 | self.language_adapter_config = None if not hasattr( |
| 638 | self.config, |
| 639 | 'language_adapter_config') else LanguageAdapterConfig.from_dict( |
| 640 | self.config.language_adapter_config) |
| 641 | |
| 642 | self.transformer = EncDecTransformer( |
| 643 | self.config.vocab_size, |
| 644 | self.config.hidden_size, |
| 645 | max_position_embeddings=self.config.max_position_embeddings, |
| 646 | has_position_embedding=self.has_position_embedding, |
| 647 | type_vocab_size=type_vocab_size, |
| 648 | has_embedding_layernorm=self.config.has_embedding_layernorm, |
| 649 | has_embedding_scale=self.config.has_embedding_scale, |
| 650 | layernorm_eps=self.config.norm_epsilon, |
| 651 | layernorm_type=self.layernorm_type, |
| 652 | dtype=self.config.dtype, |
| 653 | use_parallel_embedding=self.config.use_parallel_embedding, |
| 654 | embedding_sharding_dim=self.config.embedding_sharding_dim, |
| 655 | mapping=self.mapping, |
| 656 | has_model_final_layernorm=self.has_model_final_layernorm, |
| 657 | norm_epsilon=self.config.norm_epsilon, |
| 658 | is_decoder=False) |
nothing calls this directly
no test coverage detected