* Play one step of the game. * * @returns {number | null} If this step leads to the end of the game, * the total reward from the game as a plain number. Else, `null`.
()
| 79 | * the total reward from the game as a plain number. Else, `null`. |
| 80 | */ |
| 81 | playStep() { |
| 82 | this.epsilon = this.frameCount >= this.epsilonDecayFrames ? |
| 83 | this.epsilonFinal : |
| 84 | this.epsilonInit + this.epsilonIncrement_ * this.frameCount; |
| 85 | this.frameCount++; |
| 86 | |
| 87 | // The epsilon-greedy algorithm. |
| 88 | let action; |
| 89 | const state = this.game.getState(); |
| 90 | if (Math.random() < this.epsilon) { |
| 91 | // Pick an action at random. |
| 92 | action = getRandomAction(); |
| 93 | } else { |
| 94 | // Greedily pick an action based on online DQN output. |
| 95 | tf.tidy(() => { |
| 96 | const stateTensor = |
| 97 | getStateTensor(state, this.game.height, this.game.width) |
| 98 | action = ALL_ACTIONS[ |
| 99 | this.onlineNetwork.predict(stateTensor).argMax(-1).dataSync()[0]]; |
| 100 | }); |
| 101 | } |
| 102 | |
| 103 | const {state: nextState, reward, done, fruitEaten} = this.game.step(action); |
| 104 | |
| 105 | this.replayMemory.append([state, action, reward, done, nextState]); |
| 106 | |
| 107 | this.cumulativeReward_ += reward; |
| 108 | if (fruitEaten) { |
| 109 | this.fruitsEaten_++; |
| 110 | } |
| 111 | const output = { |
| 112 | action, |
| 113 | cumulativeReward: this.cumulativeReward_, |
| 114 | done, |
| 115 | fruitsEaten: this.fruitsEaten_ |
| 116 | }; |
| 117 | if (done) { |
| 118 | this.reset(); |
| 119 | } |
| 120 | return output; |
| 121 | } |
| 122 | |
| 123 | /** |
| 124 | * Perform training on a randomly sampled batch from the replay buffer. |
no test coverage detected