(self, X, Y, Xtest, Ytest, activation=T.nnet.relu, learning_rate=1e-2, mu=0.9, epochs=15, batch_sz=100, print_period=100, show_fig=True)
| 113 | self.hidden_layer_sizes = hidden_layer_sizes |
| 114 | |
| 115 | def fit(self, X, Y, Xtest, Ytest, activation=T.nnet.relu, learning_rate=1e-2, mu=0.9, epochs=15, batch_sz=100, print_period=100, show_fig=True): |
| 116 | X = X.astype(np.float32) |
| 117 | Y = Y.astype(np.int32) |
| 118 | |
| 119 | # initialize hidden layers |
| 120 | N, D = X.shape |
| 121 | self.layers = [] |
| 122 | M1 = D |
| 123 | for M2 in self.hidden_layer_sizes: |
| 124 | h = HiddenLayerBatchNorm(M1, M2, activation) |
| 125 | self.layers.append(h) |
| 126 | M1 = M2 |
| 127 | |
| 128 | # final layer |
| 129 | K = len(set(Y)) |
| 130 | h = HiddenLayer(M1, K, T.nnet.softmax) |
| 131 | self.layers.append(h) |
| 132 | |
| 133 | if batch_sz is None: |
| 134 | batch_sz = N |
| 135 | |
| 136 | # collect params for later use |
| 137 | self.params = [] |
| 138 | for h in self.layers: |
| 139 | self.params += h.params |
| 140 | |
| 141 | # note! we will need to build the output differently |
| 142 | # for train and test (prediction) |
| 143 | |
| 144 | # set up theano functions and variables |
| 145 | thX = T.matrix('X') |
| 146 | thY = T.ivector('Y') |
| 147 | |
| 148 | # for training |
| 149 | p_y_given_x = self.forward(thX, is_training=True) |
| 150 | |
| 151 | cost = -T.mean(T.log(p_y_given_x[T.arange(thY.shape[0]), thY])) |
| 152 | prediction = T.argmax(p_y_given_x, axis=1) |
| 153 | grads = T.grad(cost, self.params) |
| 154 | |
| 155 | # momentum only |
| 156 | updates = momentum_updates(cost, self.params, learning_rate, mu) |
| 157 | for layer in self.layers[:-1]: |
| 158 | updates += layer.running_update |
| 159 | |
| 160 | train_op = theano.function( |
| 161 | inputs=[thX, thY], |
| 162 | outputs=[cost, prediction], |
| 163 | updates=updates, |
| 164 | ) |
| 165 | |
| 166 | # for testing |
| 167 | test_p_y_given_x = self.forward(thX, is_training=False) |
| 168 | test_prediction = T.argmax(test_p_y_given_x, axis=1) |
| 169 | |
| 170 | self.predict = theano.function( |
| 171 | inputs=[thX], |
| 172 | outputs=test_prediction, |
no test coverage detected