MCPcopy
hub / github.com/tensorflow/tfjs-examples / playStep

Method playStep

snake-dqn/agent.js:81–121  ·  view source on GitHub ↗

* Play one step of the game. * * @returns {number | null} If this step leads to the end of the game, * the total reward from the game as a plain number. Else, `null`.

()

Source from the content-addressed store, hash-verified

79 * the total reward from the game as a plain number. Else, `null`.
80 */
81 playStep() {
82 this.epsilon = this.frameCount >= this.epsilonDecayFrames ?
83 this.epsilonFinal :
84 this.epsilonInit + this.epsilonIncrement_ * this.frameCount;
85 this.frameCount++;
86
87 // The epsilon-greedy algorithm.
88 let action;
89 const state = this.game.getState();
90 if (Math.random() < this.epsilon) {
91 // Pick an action at random.
92 action = getRandomAction();
93 } else {
94 // Greedily pick an action based on online DQN output.
95 tf.tidy(() => {
96 const stateTensor =
97 getStateTensor(state, this.game.height, this.game.width)
98 action = ALL_ACTIONS[
99 this.onlineNetwork.predict(stateTensor).argMax(-1).dataSync()[0]];
100 });
101 }
102
103 const {state: nextState, reward, done, fruitEaten} = this.game.step(action);
104
105 this.replayMemory.append([state, action, reward, done, nextState]);
106
107 this.cumulativeReward_ += reward;
108 if (fruitEaten) {
109 this.fruitsEaten_++;
110 }
111 const output = {
112 action,
113 cumulativeReward: this.cumulativeReward_,
114 done,
115 fruitsEaten: this.fruitsEaten_
116 };
117 if (done) {
118 this.reset();
119 }
120 return output;
121 }
122
123 /**
124 * Perform training on a randomly sampled batch from the replay buffer.

Callers 2

agent_test.jsFile · 0.80
trainFunction · 0.80

Calls 7

resetMethod · 0.95
getRandomActionFunction · 0.90
getStateTensorFunction · 0.90
getStateMethod · 0.80
stepMethod · 0.80
predictMethod · 0.45
appendMethod · 0.45

Tested by

no test coverage detected