Calculate cumulative reward :param next_state: :return: None
(self, next_state, done)
| 234 | self.reward_buffer.append(reward) |
| 235 | |
| 236 | def finish_path(self, next_state, done): |
| 237 | """ |
| 238 | Calculate cumulative reward |
| 239 | :param next_state: |
| 240 | :return: None |
| 241 | """ |
| 242 | if done: |
| 243 | v_s_ = 0 |
| 244 | else: |
| 245 | v_s_ = self.critic(np.array([next_state], np.float32))[0, 0] |
| 246 | discounted_r = [] |
| 247 | for r in self.reward_buffer[::-1]: |
| 248 | v_s_ = r + GAMMA * v_s_ |
| 249 | discounted_r.append(v_s_) |
| 250 | discounted_r.reverse() |
| 251 | discounted_r = np.array(discounted_r)[:, np.newaxis] |
| 252 | self.cumulative_reward_buffer.extend(discounted_r) |
| 253 | self.reward_buffer.clear() |
| 254 | |
| 255 | |
| 256 | if __name__ == '__main__': |