()
| 16 | |
| 17 | |
| 18 | def main(): |
| 19 | max_iter = 10 |
| 20 | print_period = 10 |
| 21 | |
| 22 | Xtrain, Xtest, Ytrain, Ytest = get_normalized_data() |
| 23 | reg = 0.01 |
| 24 | |
| 25 | Ytrain_ind = y2indicator(Ytrain) |
| 26 | Ytest_ind = y2indicator(Ytest) |
| 27 | |
| 28 | N, D = Xtrain.shape |
| 29 | batch_sz = 500 |
| 30 | n_batches = N // batch_sz |
| 31 | |
| 32 | M = 300 |
| 33 | K = 10 |
| 34 | W1_0 = np.random.randn(D, M) / np.sqrt(D) |
| 35 | b1_0 = np.zeros(M) |
| 36 | W2_0 = np.random.randn(M, K) / np.sqrt(M) |
| 37 | b2_0 = np.zeros(K) |
| 38 | |
| 39 | W1 = W1_0.copy() |
| 40 | b1 = b1_0.copy() |
| 41 | W2 = W2_0.copy() |
| 42 | b2 = b2_0.copy() |
| 43 | |
| 44 | # 1st moment |
| 45 | mW1 = 0 |
| 46 | mb1 = 0 |
| 47 | mW2 = 0 |
| 48 | mb2 = 0 |
| 49 | |
| 50 | # 2nd moment |
| 51 | vW1 = 0 |
| 52 | vb1 = 0 |
| 53 | vW2 = 0 |
| 54 | vb2 = 0 |
| 55 | |
| 56 | # hyperparams |
| 57 | lr0 = 0.001 |
| 58 | beta1 = 0.9 |
| 59 | beta2 = 0.999 |
| 60 | eps = 1e-8 |
| 61 | |
| 62 | # 1. Adam |
| 63 | loss_adam = [] |
| 64 | err_adam = [] |
| 65 | t = 1 |
| 66 | for i in range(max_iter): |
| 67 | for j in range(n_batches): |
| 68 | Xbatch = Xtrain[j*batch_sz:(j*batch_sz + batch_sz),] |
| 69 | Ybatch = Ytrain_ind[j*batch_sz:(j*batch_sz + batch_sz),] |
| 70 | pYbatch, Z = forward(Xbatch, W1, b1, W2, b2) |
| 71 | |
| 72 | # updates |
| 73 | # gradients |
| 74 | gW2 = derivative_w2(Z, Ybatch, pYbatch) + reg*W2 |
| 75 | gb2 = derivative_b2(Ybatch, pYbatch) + reg*b2 |
no test coverage detected