| 31 | |
| 32 | |
| 33 | class ANN(object): |
| 34 | def __init__(self, hidden_layer_sizes): |
| 35 | self.hidden_layer_sizes = hidden_layer_sizes |
| 36 | |
| 37 | def fit(self, X, Y, learning_rate=1e-2, mu=0.99, reg=1e-12, epochs=400, batch_sz=20, print_period=1, show_fig=False): |
| 38 | |
| 39 | # X = X.astype(np.float32) |
| 40 | Y = Y.astype(np.int32) |
| 41 | |
| 42 | # initialize hidden layers |
| 43 | N, D = X.shape |
| 44 | K = len(set(Y)) |
| 45 | self.hidden_layers = [] |
| 46 | M1 = D |
| 47 | count = 0 |
| 48 | for M2 in self.hidden_layer_sizes: |
| 49 | h = HiddenLayer(M1, M2, count) |
| 50 | self.hidden_layers.append(h) |
| 51 | M1 = M2 |
| 52 | count += 1 |
| 53 | W = init_weight(M1, K) |
| 54 | b = np.zeros(K) |
| 55 | self.W = theano.shared(W, 'W_logreg') |
| 56 | self.b = theano.shared(b, 'b_logreg') |
| 57 | |
| 58 | # collect params for later use |
| 59 | self.params = [self.W, self.b] |
| 60 | for h in self.hidden_layers: |
| 61 | self.params += h.params |
| 62 | |
| 63 | # for momentum |
| 64 | dparams = [theano.shared(np.zeros(p.get_value().shape)) for p in self.params] |
| 65 | |
| 66 | # for rmsprop |
| 67 | cache = [theano.shared(np.zeros(p.get_value().shape)) for p in self.params] |
| 68 | |
| 69 | # set up theano functions and variables |
| 70 | thX = T.matrix('X') |
| 71 | thY = T.ivector('Y') |
| 72 | pY = self.forward(thX) |
| 73 | |
| 74 | rcost = reg*T.sum([(p*p).sum() for p in self.params]) |
| 75 | cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost |
| 76 | prediction = self.predict(thX) |
| 77 | grads = T.grad(cost, self.params) |
| 78 | |
| 79 | # momentum only |
| 80 | updates = [ |
| 81 | (p, p + mu*dp - learning_rate*g) for p, dp, g in zip(self.params, dparams, grads) |
| 82 | ] + [ |
| 83 | (dp, mu*dp - learning_rate*g) for dp, g in zip(dparams, grads) |
| 84 | ] |
| 85 | |
| 86 | train_op = theano.function( |
| 87 | inputs=[thX, thY], |
| 88 | outputs=[cost, prediction], |
| 89 | updates=updates, |
| 90 | ) |