| 27 | self.session = session |
| 28 | |
| 29 | def build(self, We, Wx, Wh, bh, h0, Wo, bo): |
| 30 | # make them tf Variables |
| 31 | self.We = tf.Variable(We) |
| 32 | self.Wx = tf.Variable(Wx) |
| 33 | self.Wh = tf.Variable(Wh) |
| 34 | self.bh = tf.Variable(bh) |
| 35 | self.h0 = tf.Variable(h0) |
| 36 | self.Wo = tf.Variable(Wo) |
| 37 | self.bo = tf.Variable(bo) |
| 38 | self.params = [self.We, self.Wx, self.Wh, self.bh, self.h0, self.Wo, self.bo] |
| 39 | |
| 40 | # for easy access |
| 41 | V = self.V |
| 42 | D = self.D |
| 43 | M = self.M |
| 44 | |
| 45 | # placeholders |
| 46 | self.tfX = tf.placeholder(tf.int32, shape=(None,), name='X') |
| 47 | self.tfY = tf.placeholder(tf.int32, shape=(None,), name='Y') |
| 48 | |
| 49 | # convert word indexes to word vectors |
| 50 | # this would be equivalent to doing |
| 51 | # We[tfX] in Numpy / Theano |
| 52 | # or: |
| 53 | # X_one_hot = one_hot_encode(X) |
| 54 | # X_one_hot.dot(We) |
| 55 | XW = tf.nn.embedding_lookup(We, self.tfX) |
| 56 | |
| 57 | # multiply it by input->hidden so we don't have to do |
| 58 | # it inside recurrence |
| 59 | XW_Wx = tf.matmul(XW, self.Wx) |
| 60 | |
| 61 | |
| 62 | def recurrence(h_t1, XW_Wx_t): |
| 63 | # returns h(t), y(t) |
| 64 | h_t1 = tf.reshape(h_t1, (1, M)) |
| 65 | h_t = self.f(XW_Wx_t + tf.matmul(h_t1, self.Wh) + self.bh) |
| 66 | h_t = tf.reshape(h_t, (M,)) |
| 67 | return h_t |
| 68 | |
| 69 | h = tf.scan( |
| 70 | fn=recurrence, |
| 71 | elems=XW_Wx, |
| 72 | initializer=self.h0, |
| 73 | ) |
| 74 | |
| 75 | # output |
| 76 | logits = tf.matmul(h, self.Wo) + self.bo |
| 77 | prediction = tf.argmax(logits, 1) |
| 78 | self.output_probs = tf.nn.softmax(logits) |
| 79 | |
| 80 | nce_weights = tf.transpose(self.Wo, [1,0]) # needs to be VxD, not DxV |
| 81 | nce_biases = self.bo |
| 82 | |
| 83 | h = tf.reshape(h, (-1, M)) |
| 84 | labels = tf.reshape(self.tfY, (-1, 1)) |
| 85 | |
| 86 | self.cost = tf.reduce_mean( |