Choose action :param state: state :param greedy: choose action greedy or not :return: clipped action
(self, state, greedy=False)
| 186 | self.reward_buffer.clear() |
| 187 | |
| 188 | def get_action(self, state, greedy=False): |
| 189 | """ |
| 190 | Choose action |
| 191 | :param state: state |
| 192 | :param greedy: choose action greedy or not |
| 193 | :return: clipped action |
| 194 | """ |
| 195 | state = state[np.newaxis, :].astype(np.float32) |
| 196 | mean, std = self.actor(state), tf.exp(self.actor.logstd) |
| 197 | if greedy: |
| 198 | action = mean[0] |
| 199 | else: |
| 200 | pi = tfp.distributions.Normal(mean, std) |
| 201 | action = tf.squeeze(pi.sample(1), axis=0)[0] # choosing action |
| 202 | return np.clip(action, -self.action_bound, self.action_bound) |
| 203 | |
| 204 | def save(self): |
| 205 | """ |