(
dim_proj=128, # word embeding dimension and LSTM number of hidden units.
patience=10, # Number of epoch to wait before early stop if no progress
max_epochs=5000, # The maximum number of epoch to run
dispFreq=10, # Display to stdout the training progress every N updates
decay_c=0., # Weight decay for the classifier applied to the U weights.
lrate=0.0001, # Learning rate for sgd (not used for adadelta and rmsprop)
n_words=10000, # Vocabulary size
optimizer=adadelta, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
encoder='lstm', # TODO: can be removed must be lstm.
saveto='lstm_model.npz', # The best model will be saved there
validFreq=370, # Compute the validation error after this number of update.
saveFreq=1110, # Save the parameters after every saveFreq updates
maxlen=100, # Sequence longer then this get ignored
batch_size=16, # The batch size during training.
valid_batch_size=64, # The batch size used for validation/test set.
dataset='imdb',
# Parameter for extra option
noise_std=0.,
use_dropout=True, # if False slightly faster, but worst test error
# This frequently need a bigger model.
reload_model=None, # Path to a saved model we want to start from.
test_size=-1, # If >0, we keep only this number of test example.
)
| 446 | |
| 447 | |
| 448 | def train_lstm( |
| 449 | dim_proj=128, # word embeding dimension and LSTM number of hidden units. |
| 450 | patience=10, # Number of epoch to wait before early stop if no progress |
| 451 | max_epochs=5000, # The maximum number of epoch to run |
| 452 | dispFreq=10, # Display to stdout the training progress every N updates |
| 453 | decay_c=0., # Weight decay for the classifier applied to the U weights. |
| 454 | lrate=0.0001, # Learning rate for sgd (not used for adadelta and rmsprop) |
| 455 | n_words=10000, # Vocabulary size |
| 456 | optimizer=adadelta, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate). |
| 457 | encoder='lstm', # TODO: can be removed must be lstm. |
| 458 | saveto='lstm_model.npz', # The best model will be saved there |
| 459 | validFreq=370, # Compute the validation error after this number of update. |
| 460 | saveFreq=1110, # Save the parameters after every saveFreq updates |
| 461 | maxlen=100, # Sequence longer then this get ignored |
| 462 | batch_size=16, # The batch size during training. |
| 463 | valid_batch_size=64, # The batch size used for validation/test set. |
| 464 | dataset='imdb', |
| 465 | |
| 466 | # Parameter for extra option |
| 467 | noise_std=0., |
| 468 | use_dropout=True, # if False slightly faster, but worst test error |
| 469 | # This frequently need a bigger model. |
| 470 | reload_model=None, # Path to a saved model we want to start from. |
| 471 | test_size=-1, # If >0, we keep only this number of test example. |
| 472 | ): |
| 473 | |
| 474 | # Model options |
| 475 | model_options = locals().copy() |
| 476 | print("model options", model_options) |
| 477 | |
| 478 | load_data, prepare_data = get_dataset(dataset) |
| 479 | |
| 480 | print('Loading data') |
| 481 | train, valid, test = load_data(n_words=n_words, valid_portion=0.05, |
| 482 | maxlen=maxlen) |
| 483 | if test_size > 0: |
| 484 | # The test set is sorted by size, but we want to keep random |
| 485 | # size example. So we must select a random selection of the |
| 486 | # examples. |
| 487 | idx = numpy.arange(len(test[0])) |
| 488 | numpy.random.shuffle(idx) |
| 489 | idx = idx[:test_size] |
| 490 | test = ([test[0][n] for n in idx], [test[1][n] for n in idx]) |
| 491 | |
| 492 | ydim = numpy.max(train[1]) + 1 |
| 493 | |
| 494 | model_options['ydim'] = ydim |
| 495 | |
| 496 | print('Building model') |
| 497 | # This create the initial parameters as numpy ndarrays. |
| 498 | # Dict name (string) -> numpy ndarray |
| 499 | params = init_params(model_options) |
| 500 | |
| 501 | if reload_model: |
| 502 | load_params('lstm_model.npz', params) |
| 503 | |
| 504 | # This create Theano Shared Variable from the parameters. |
| 505 | # Dict name (string) -> Theano Tensor Shared Variable |
no test coverage detected