Configuration class to store the configuration of a `BertModel`.
| 197 | |
| 198 | |
| 199 | class BertConfig(object): |
| 200 | """Configuration class to store the configuration of a `BertModel`. |
| 201 | """ |
| 202 | def __init__(self, |
| 203 | vocab_size_or_config_json_file, |
| 204 | hidden_size=768, |
| 205 | num_hidden_layers=12, |
| 206 | num_attention_heads=12, |
| 207 | intermediate_size=3072, |
| 208 | hidden_act="gelu", |
| 209 | hidden_dropout_prob=0.1, |
| 210 | attention_probs_dropout_prob=0.1, |
| 211 | max_position_embeddings=512, |
| 212 | type_vocab_size=2, |
| 213 | initializer_range=0.02): |
| 214 | """Constructs BertConfig. |
| 215 | |
| 216 | Args: |
| 217 | vocab_size_or_config_json_file: Vocabulary size of `inputs_ids` in `BertModel`. |
| 218 | hidden_size: Size of the encoder layers and the pooler layer. |
| 219 | num_hidden_layers: Number of hidden layers in the Transformer encoder. |
| 220 | num_attention_heads: Number of attention heads for each attention layer in |
| 221 | the Transformer encoder. |
| 222 | intermediate_size: The size of the "intermediate" (i.e., feed-forward) |
| 223 | layer in the Transformer encoder. |
| 224 | hidden_act: The non-linear activation function (function or string) in the |
| 225 | encoder and pooler. If string, "gelu", "relu" and "swish" are supported. |
| 226 | hidden_dropout_prob: The dropout probabilitiy for all fully connected |
| 227 | layers in the embeddings, encoder, and pooler. |
| 228 | attention_probs_dropout_prob: The dropout ratio for the attention |
| 229 | probabilities. |
| 230 | max_position_embeddings: The maximum sequence length that this model might |
| 231 | ever be used with. Typically set this to something large just in case |
| 232 | (e.g., 512 or 1024 or 2048). |
| 233 | type_vocab_size: The vocabulary size of the `token_type_ids` passed into |
| 234 | `BertModel`. |
| 235 | initializer_range: The sttdev of the truncated_normal_initializer for |
| 236 | initializing all weight matrices. |
| 237 | """ |
| 238 | if isinstance(vocab_size_or_config_json_file, str) or (sys.version_info[0] == 2 |
| 239 | and isinstance(vocab_size_or_config_json_file, unicode)): |
| 240 | with open(vocab_size_or_config_json_file, "r", encoding='utf-8') as reader: |
| 241 | json_config = json.loads(reader.read()) |
| 242 | for key, value in json_config.items(): |
| 243 | self.__dict__[key] = value |
| 244 | elif isinstance(vocab_size_or_config_json_file, int): |
| 245 | self.vocab_size = vocab_size_or_config_json_file |
| 246 | self.hidden_size = hidden_size |
| 247 | self.num_hidden_layers = num_hidden_layers |
| 248 | self.num_attention_heads = num_attention_heads |
| 249 | self.hidden_act = hidden_act |
| 250 | self.intermediate_size = intermediate_size |
| 251 | self.hidden_dropout_prob = hidden_dropout_prob |
| 252 | self.attention_probs_dropout_prob = attention_probs_dropout_prob |
| 253 | self.max_position_embeddings = max_position_embeddings |
| 254 | self.type_vocab_size = type_vocab_size |
| 255 | self.initializer_range = initializer_range |
| 256 | else: |