参数说明: X_train:训练数据 y_train:训练数据标签 n_epochs:epoch 次数 batch_size:每次 epoch 的 batch size verbose:是否每个 batch 输出损失 epo_verbose:是否每个 epoch 输出损失
(self, X_train, y_train, n_epochs=20, batch_size=64, verbose=False, epo_verbose=True)
| 945 | v.flush_gradients() |
| 946 | |
| 947 | def fit(self, X_train, y_train, n_epochs=20, batch_size=64, verbose=False, epo_verbose=True): |
| 948 | """ |
| 949 | 参数说明: |
| 950 | X_train:训练数据 |
| 951 | y_train:训练数据标签 |
| 952 | n_epochs:epoch 次数 |
| 953 | batch_size:每次 epoch 的 batch size |
| 954 | verbose:是否每个 batch 输出损失 |
| 955 | epo_verbose:是否每个 epoch 输出损失 |
| 956 | """ |
| 957 | self.verbose = verbose |
| 958 | self.n_epochs = n_epochs |
| 959 | self.batch_size = batch_size |
| 960 | |
| 961 | if not self.is_initialized: |
| 962 | self.n_features = X_train.shape[1] |
| 963 | self._set_params() |
| 964 | |
| 965 | prev_loss = np.inf |
| 966 | for i in range(n_epochs): |
| 967 | loss, epoch_start = 0.0, time.time() |
| 968 | batch_generator, n_batch = minibatch(X_train, self.batch_size, shuffle=True) |
| 969 | |
| 970 | for j, batch_idx in enumerate(batch_generator): |
| 971 | batch_len, batch_start = len(batch_idx), time.time() |
| 972 | X_batch, y_batch = X_train[batch_idx], y_train[batch_idx] |
| 973 | out, _ = self.forward(X_batch) |
| 974 | y_pred_batch = softmax(out) |
| 975 | batch_loss = self.loss(y_batch, y_pred_batch) |
| 976 | grad = self.loss.grad(y_batch, y_pred_batch) |
| 977 | _, _ = self.backward(grad) |
| 978 | self.update() |
| 979 | loss += batch_loss |
| 980 | |
| 981 | if self.verbose: |
| 982 | fstr = "\t[Batch {}/{}] Train loss: {:.3f} ({:.1f}s/batch)" |
| 983 | print(fstr.format(j + 1, n_batch, batch_loss, time.time() - batch_start)) |
| 984 | |
| 985 | loss /= n_batch |
| 986 | if epo_verbose: |
| 987 | fstr = "[Epoch {}] Avg. loss: {:.3f} Delta: {:.3f} ({:.2f}m/epoch)" |
| 988 | print(fstr.format(i + 1, loss, prev_loss - loss, (time.time() - epoch_start) / 60.0)) |
| 989 | prev_loss = loss |
| 990 | |
| 991 | def evaluate(self, X_test, y_test, batch_size=128): |
| 992 | acc = 0.0 |