Method learn

examples/reinforcement_learning/tutorial_DDPG.py:165–193 · view source on GitHub ↗

Update parameters :return: None

(self)

Source from the content-addressed store, hash-verified

163	) # add randomness to action selection for exploration
164
165	def learn(self):
166	"""
167	Update parameters
168	:return: None
169	"""
170	self.var *= .9995
171	indices = np.random.choice(MEMORY_CAPACITY, size=BATCH_SIZE)
172	datas = self.memory[indices, :]
173	states = datas[:, :self.state_dim]
174	actions = datas[:, self.state_dim:self.state_dim + self.action_dim]
175	rewards = datas[:, -self.state_dim - 1:-self.state_dim]
176	states_ = datas[:, -self.state_dim:]
177
178	with tf.GradientTape() as tape:
179	actions_ = self.actor_target(states_)
180	q_ = self.critic_target([states_, actions_])
181	y = rewards + GAMMA * q_
182	q = self.critic([states, actions])
183	td_error = tf.losses.mean_squared_error(y, q)
184	critic_grads = tape.gradient(td_error, self.critic.trainable_weights)
185	self.critic_opt.apply_gradients(zip(critic_grads, self.critic.trainable_weights))
186
187	with tf.GradientTape() as tape:
188	a = self.actor(states)
189	q = self.critic([states, a])
190	actor_loss = -tf.reduce_mean(q) # maximize the q
191	actor_grads = tape.gradient(actor_loss, self.actor.trainable_weights)
192	self.actor_opt.apply_gradients(zip(actor_grads, self.actor.trainable_weights))
193	self.ema_update()
194
195	def store_transition(self, s, a, r, s_):
196	"""

tutorial_DDPG.pyFile · 0.45

ema_updateMethod · 0.95

gradientMethod · 0.80

no test coverage detected