参数说明: X_train:训练数据 y_train:训练数据标签 n_epochs:epoch 次数 batch_size:每次 epoch 的 batch size verbose:是否每个 batch 输出损失 epo_verbose:是否每个 epoch 输出损失
(self, X_train, y_train, n_epochs=20, batch_size=64, verbose=False, epo_verbose=True)
| 343 | v.flush_gradients() |
| 344 | |
| 345 | def fit(self, X_train, y_train, n_epochs=20, batch_size=64, verbose=False, epo_verbose=True): |
| 346 | """ |
| 347 | 参数说明: |
| 348 | X_train:训练数据 |
| 349 | y_train:训练数据标签 |
| 350 | n_epochs:epoch 次数 |
| 351 | batch_size:每次 epoch 的 batch size |
| 352 | verbose:是否每个 batch 输出损失 |
| 353 | epo_verbose:是否每个 epoch 输出损失 |
| 354 | """ |
| 355 | self.verbose = verbose |
| 356 | self.n_epochs = n_epochs |
| 357 | self.batch_size = batch_size |
| 358 | |
| 359 | if not self.is_initialized: |
| 360 | self.n_features = X_train.shape[1] |
| 361 | self._set_params() |
| 362 | |
| 363 | prev_loss = np.inf |
| 364 | for i in range(n_epochs): |
| 365 | loss, epoch_start = 0.0, time.time() |
| 366 | batch_generator, n_batch = minibatch(X_train, self.batch_size, shuffle=True) |
| 367 | |
| 368 | for j, batch_idx in enumerate(batch_generator): |
| 369 | batch_len, batch_start = len(batch_idx), time.time() |
| 370 | X_batch, y_batch = X_train[batch_idx], y_train[batch_idx] |
| 371 | out, _ = self.forward(X_batch) |
| 372 | y_pred_batch = softmax(out) |
| 373 | batch_loss = self.loss(y_batch, y_pred_batch) |
| 374 | grad = self.loss.grad(y_batch, y_pred_batch) |
| 375 | _, _ = self.backward(grad) |
| 376 | self.update() |
| 377 | loss += batch_loss |
| 378 | |
| 379 | if self.verbose: |
| 380 | fstr = "\t[Batch {}/{}] Train loss: {:.3f} ({:.1f}s/batch)" |
| 381 | print(fstr.format(j + 1, n_batch, batch_loss, time.time() - batch_start)) |
| 382 | |
| 383 | loss /= n_batch |
| 384 | if epo_verbose: |
| 385 | fstr = "[Epoch {}] Avg. loss: {:.3f} Delta: {:.3f} ({:.2f}m/epoch)" |
| 386 | print(fstr.format(i + 1, loss, prev_loss - loss, (time.time() - epoch_start) / 60.0)) |
| 387 | prev_loss = loss |
| 388 | |
| 389 | def evaluate(self, X_test, y_test, batch_size=128): |
| 390 | acc = 0.0 |