hub / github.com/tensorflow/tfjs-examples / playStep

Method playStep

snake-dqn/agent.js:81–121 · view source on GitHub ↗

* Play one step of the game. * * @returns {number | null} If this step leads to the end of the game, * the total reward from the game as a plain number. Else, `null`.

()

Source from the content-addressed store, hash-verified

79	* the total reward from the game as a plain number. Else, `null`.
80	*/
81	playStep() {
82	this.epsilon = this.frameCount >= this.epsilonDecayFrames ?
83	this.epsilonFinal :
84	this.epsilonInit + this.epsilonIncrement_ * this.frameCount;
85	this.frameCount++;
86
87	// The epsilon-greedy algorithm.
88	let action;
89	const state = this.game.getState();
90	if (Math.random() < this.epsilon) {
91	// Pick an action at random.
92	action = getRandomAction();
93	} else {
94	// Greedily pick an action based on online DQN output.
95	tf.tidy(() => {
96	const stateTensor =
97	getStateTensor(state, this.game.height, this.game.width)
98	action = ALL_ACTIONS[
99	this.onlineNetwork.predict(stateTensor).argMax(-1).dataSync()[0]];
100	});
101	}
102
103	const {state: nextState, reward, done, fruitEaten} = this.game.step(action);
104
105	this.replayMemory.append([state, action, reward, done, nextState]);
106
107	this.cumulativeReward_ += reward;
108	if (fruitEaten) {
109	this.fruitsEaten_++;
110	}
111	const output = {
112	action,
113	cumulativeReward: this.cumulativeReward_,
114	done,
115	fruitsEaten: this.fruitsEaten_
116	};
117	if (done) {
118	this.reset();
119	}
120	return output;
121	}
122
123	/**
124	* Perform training on a randomly sampled batch from the replay buffer.

Callers 2

agent_test.jsFile · 0.80

trainFunction · 0.80

Calls 7

resetMethod · 0.95

getRandomActionFunction · 0.90

getStateTensorFunction · 0.90

getStateMethod · 0.80

stepMethod · 0.80

predictMethod · 0.45

appendMethod · 0.45

Tested by

no test coverage detected