hub / github.com/PaddlePaddle/PaddleNLP / ELMo

Class ELMo

examples/language_model/elmo/elmo.py:37–104 · view source on GitHub ↗

Source from the content-addressed store, hash-verified

35
36
37	class ELMo(nn.Layer):
38	def __init__(
39	self,
40	batch_size=None,
41	char_embed_dim=16,
42	projection_dim=512,
43	vocab_size=None,
44	cnn_filters=[[1, 32], [2, 32], [3, 64], [4, 128], [5, 256], [6, 512], [7, 1024]],
45	char_vocab_size=262,
46	max_characters_per_token=50,
47	num_highways=2,
48	num_layers=2,
49	dropout=0.1,
50	task="pre-train",
51	):
52	super(ELMo, self).__init__()
53
54	if task == "pre-train":
55	if vocab_size is None or batch_size is None:
56	raise ValueError('vocab_size and batch_size should be set when task="pre-train"')
57	elif task == "fine-tune":
58	if batch_size is None:
59	batch_size = 128
60	else:
61	raise ValueError('task should be "pre-train" or "fine-tune"')
62
63	self._projection_dim = projection_dim
64	self._task = task
65
66	self._token_embding_layer = ELMoCharacterEncoderLayer(
67	char_vocab_size, char_embed_dim, projection_dim, num_highways, cnn_filters, max_characters_per_token
68	)
69	self._elmobilm = ELMoBiLM(batch_size, projection_dim, projection_dim, num_layers, dropout, task)
70	if task == "pre-train":
71	paramAttr = paddle.ParamAttr(initializer=I.Normal(mean=0.0, std=1.0 / np.sqrt(projection_dim)))
72	self._linear_layer = nn.Linear(projection_dim, vocab_size, weight_attr=paramAttr)
73
74	@property
75	def embedding_dim(self):
76	return self._projection_dim * 2
77
78	def forward(self, inputs):
79	# [batch_size, seq_len, max_characters_per_token]
80	ids, ids_reverse = inputs
81	# [batch_size, seq_len, projection_dim]
82	token_embedding = self._token_embding_layer(ids)
83	token_embedding_reverse = self._token_embding_layer(ids_reverse)
84
85	outs = self._elmobilm(token_embedding, token_embedding_reverse)
86
87	if self._task == "pre-train":
88	# [batch_size, seq_len, projection_dim]
89	fw_out, bw_out = outs
90
91	# [batch_size, max_seq_len, vocab_size]
92	fw_logits = self._linear_layer(fw_out)
93	bw_logits = self._linear_layer(bw_out)
94	return [fw_logits, bw_logits]

Callers 3

trainFunction · 0.90

evalFunction · 0.90

get_elmo_layerFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…