(self, X, Y, learning_rate=1.0, mu=0.99, reg=1.0, activation=T.tanh, epochs=500, show_fig=False)
| 21 | self.V = V # vocabulary size |
| 22 | |
| 23 | def fit(self, X, Y, learning_rate=1.0, mu=0.99, reg=1.0, activation=T.tanh, epochs=500, show_fig=False): |
| 24 | M = self.M |
| 25 | V = self.V |
| 26 | K = len(set(Y)) |
| 27 | print("V:", V) |
| 28 | |
| 29 | X, Y = shuffle(X, Y) |
| 30 | Nvalid = 10 |
| 31 | Xvalid, Yvalid = X[-Nvalid:], Y[-Nvalid:] |
| 32 | X, Y = X[:-Nvalid], Y[:-Nvalid] |
| 33 | N = len(X) |
| 34 | |
| 35 | # initial weights |
| 36 | Wx = init_weight(V, M) |
| 37 | Wh = init_weight(M, M) |
| 38 | bh = np.zeros(M) |
| 39 | h0 = np.zeros(M) |
| 40 | Wo = init_weight(M, K) |
| 41 | bo = np.zeros(K) |
| 42 | |
| 43 | thX, thY, py_x, prediction = self.set(Wx, Wh, bh, h0, Wo, bo, activation) |
| 44 | |
| 45 | cost = -T.mean(T.log(py_x[thY])) |
| 46 | grads = T.grad(cost, self.params) |
| 47 | dparams = [theano.shared(p.get_value()*0) for p in self.params] |
| 48 | lr = T.scalar('learning_rate') |
| 49 | |
| 50 | updates = [ |
| 51 | (p, p + mu*dp - lr*g) for p, dp, g in zip(self.params, dparams, grads) |
| 52 | ] + [ |
| 53 | (dp, mu*dp - lr*g) for dp, g in zip(dparams, grads) |
| 54 | ] |
| 55 | |
| 56 | self.train_op = theano.function( |
| 57 | inputs=[thX, thY, lr], |
| 58 | outputs=[cost, prediction], |
| 59 | updates=updates, |
| 60 | allow_input_downcast=True, |
| 61 | ) |
| 62 | |
| 63 | costs = [] |
| 64 | for i in range(epochs): |
| 65 | X, Y = shuffle(X, Y) |
| 66 | n_correct = 0 |
| 67 | cost = 0 |
| 68 | for j in range(N): |
| 69 | # we set 0 to start and 1 to end |
| 70 | # print "X[%d]:" % j, X[j], "len:", len(X[j]) |
| 71 | c, p = self.train_op(X[j], Y[j], learning_rate) |
| 72 | # print "p:", p, "y:", Y[j] |
| 73 | cost += c |
| 74 | if p == Y[j]: |
| 75 | n_correct += 1 |
| 76 | # update the learning rate |
| 77 | learning_rate *= 0.9999 |
| 78 | |
| 79 | # calculate validation accuracy |
| 80 | n_correct_valid = 0 |
no test coverage detected