MCPcopy
hub / github.com/PaddlePaddle/PaddleNLP / ELMo

Class ELMo

examples/language_model/elmo/elmo.py:37–104  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

35
36
37class ELMo(nn.Layer):
38 def __init__(
39 self,
40 batch_size=None,
41 char_embed_dim=16,
42 projection_dim=512,
43 vocab_size=None,
44 cnn_filters=[[1, 32], [2, 32], [3, 64], [4, 128], [5, 256], [6, 512], [7, 1024]],
45 char_vocab_size=262,
46 max_characters_per_token=50,
47 num_highways=2,
48 num_layers=2,
49 dropout=0.1,
50 task="pre-train",
51 ):
52 super(ELMo, self).__init__()
53
54 if task == "pre-train":
55 if vocab_size is None or batch_size is None:
56 raise ValueError('vocab_size and batch_size should be set when task="pre-train"')
57 elif task == "fine-tune":
58 if batch_size is None:
59 batch_size = 128
60 else:
61 raise ValueError('task should be "pre-train" or "fine-tune"')
62
63 self._projection_dim = projection_dim
64 self._task = task
65
66 self._token_embding_layer = ELMoCharacterEncoderLayer(
67 char_vocab_size, char_embed_dim, projection_dim, num_highways, cnn_filters, max_characters_per_token
68 )
69 self._elmobilm = ELMoBiLM(batch_size, projection_dim, projection_dim, num_layers, dropout, task)
70 if task == "pre-train":
71 paramAttr = paddle.ParamAttr(initializer=I.Normal(mean=0.0, std=1.0 / np.sqrt(projection_dim)))
72 self._linear_layer = nn.Linear(projection_dim, vocab_size, weight_attr=paramAttr)
73
74 @property
75 def embedding_dim(self):
76 return self._projection_dim * 2
77
78 def forward(self, inputs):
79 # [batch_size, seq_len, max_characters_per_token]
80 ids, ids_reverse = inputs
81 # [batch_size, seq_len, projection_dim]
82 token_embedding = self._token_embding_layer(ids)
83 token_embedding_reverse = self._token_embding_layer(ids_reverse)
84
85 outs = self._elmobilm(token_embedding, token_embedding_reverse)
86
87 if self._task == "pre-train":
88 # [batch_size, seq_len, projection_dim]
89 fw_out, bw_out = outs
90
91 # [batch_size, max_seq_len, vocab_size]
92 fw_logits = self._linear_layer(fw_out)
93 bw_logits = self._linear_layer(bw_out)
94 return [fw_logits, bw_logits]

Callers 3

trainFunction · 0.90
evalFunction · 0.90
get_elmo_layerFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…