()
| 27 | |
| 28 | |
| 29 | def main(): |
| 30 | # step 1: get the data and define all the usual variables |
| 31 | Xtrain, Xtest, Ytrain, Ytest = get_normalized_data() |
| 32 | |
| 33 | max_iter = 20 |
| 34 | print_period = 10 |
| 35 | |
| 36 | lr = 0.0004 |
| 37 | reg = 0.01 |
| 38 | |
| 39 | Xtrain = Xtrain.astype(np.float32) |
| 40 | Ytrain = Ytrain.astype(np.float32) |
| 41 | Xtest = Xtest.astype(np.float32) |
| 42 | Ytest = Ytest.astype(np.float32) |
| 43 | Ytrain_ind = y2indicator(Ytrain).astype(np.float32) |
| 44 | Ytest_ind = y2indicator(Ytest).astype(np.float32) |
| 45 | |
| 46 | N, D = Xtrain.shape |
| 47 | batch_sz = 500 |
| 48 | n_batches = N // batch_sz |
| 49 | |
| 50 | M = 300 |
| 51 | K = 10 |
| 52 | W1_init = np.random.randn(D, M) / np.sqrt(D) |
| 53 | b1_init = np.zeros(M) |
| 54 | W2_init = np.random.randn(M, K) / np.sqrt(M) |
| 55 | b2_init = np.zeros(K) |
| 56 | |
| 57 | # step 2: define theano variables and expressions |
| 58 | thX = T.matrix('X') |
| 59 | thT = T.matrix('T') |
| 60 | W1 = theano.shared(W1_init, 'W1') |
| 61 | b1 = theano.shared(b1_init, 'b1') |
| 62 | W2 = theano.shared(W2_init, 'W2') |
| 63 | b2 = theano.shared(b2_init, 'b2') |
| 64 | |
| 65 | # we can use the built-in theano functions to do relu and softmax |
| 66 | thZ = relu( thX.dot(W1) + b1 ) # relu is new in version 0.7.1 but just in case you don't have it |
| 67 | thY = T.nnet.softmax( thZ.dot(W2) + b2 ) |
| 68 | |
| 69 | # define the cost function and prediction |
| 70 | cost = -(thT * T.log(thY)).sum() + reg*((W1*W1).sum() + (b1*b1).sum() + (W2*W2).sum() + (b2*b2).sum()) |
| 71 | prediction = T.argmax(thY, axis=1) |
| 72 | |
| 73 | # step 3: training expressions and functions |
| 74 | # we can just include regularization as part of the cost because it is also automatically differentiated! |
| 75 | update_W1 = W1 - lr*T.grad(cost, W1) |
| 76 | update_b1 = b1 - lr*T.grad(cost, b1) |
| 77 | update_W2 = W2 - lr*T.grad(cost, W2) |
| 78 | update_b2 = b2 - lr*T.grad(cost, b2) |
| 79 | |
| 80 | train = theano.function( |
| 81 | inputs=[thX, thT], |
| 82 | updates=[(W1, update_W1), (b1, update_b1), (W2, update_W2), (b2, update_b2)], |
| 83 | ) |
| 84 | |
| 85 | # create another function for this because we want it over the whole dataset |
| 86 | get_prediction = theano.function( |
no test coverage detected