MCPcopy
hub / github.com/tensorlayer/TensorLayer / learn

Method learn

examples/reinforcement_learning/tutorial_DDPG.py:165–193  ·  view source on GitHub ↗

Update parameters :return: None

(self)

Source from the content-addressed store, hash-verified

163 ) # add randomness to action selection for exploration
164
165 def learn(self):
166 """
167 Update parameters
168 :return: None
169 """
170 self.var *= .9995
171 indices = np.random.choice(MEMORY_CAPACITY, size=BATCH_SIZE)
172 datas = self.memory[indices, :]
173 states = datas[:, :self.state_dim]
174 actions = datas[:, self.state_dim:self.state_dim + self.action_dim]
175 rewards = datas[:, -self.state_dim - 1:-self.state_dim]
176 states_ = datas[:, -self.state_dim:]
177
178 with tf.GradientTape() as tape:
179 actions_ = self.actor_target(states_)
180 q_ = self.critic_target([states_, actions_])
181 y = rewards + GAMMA * q_
182 q = self.critic([states, actions])
183 td_error = tf.losses.mean_squared_error(y, q)
184 critic_grads = tape.gradient(td_error, self.critic.trainable_weights)
185 self.critic_opt.apply_gradients(zip(critic_grads, self.critic.trainable_weights))
186
187 with tf.GradientTape() as tape:
188 a = self.actor(states)
189 q = self.critic([states, a])
190 actor_loss = -tf.reduce_mean(q) # maximize the q
191 actor_grads = tape.gradient(actor_loss, self.actor.trainable_weights)
192 self.actor_opt.apply_gradients(zip(actor_grads, self.actor.trainable_weights))
193 self.ema_update()
194
195 def store_transition(self, s, a, r, s_):
196 """

Callers 1

tutorial_DDPG.pyFile · 0.45

Calls 2

ema_updateMethod · 0.95
gradientMethod · 0.80

Tested by

no test coverage detected