hard-copy update for initializing target networks
(self, net, target_net)
| 246 | self.policy_optimizer = tf.optimizers.Adam(policy_lr) |
| 247 | |
| 248 | def target_ini(self, net, target_net): |
| 249 | """ hard-copy update for initializing target networks """ |
| 250 | for target_param, param in zip(target_net.trainable_weights, net.trainable_weights): |
| 251 | target_param.assign(param) |
| 252 | return target_net |
| 253 | |
| 254 | def target_soft_update(self, net, target_net, soft_tau): |
| 255 | """ soft update the target net with Polyak averaging """ |