Append one timestep of agent-environment interaction to the buffer.
(self, obs, act, rew, val, logp, mean, log_std)
| 97 | self.ptr, self.path_start_idx, self.max_size = 0, 0, size |
| 98 | |
| 99 | def store(self, obs, act, rew, val, logp, mean, log_std): |
| 100 | """ |
| 101 | Append one timestep of agent-environment interaction to the buffer. |
| 102 | """ |
| 103 | assert self.ptr < self.max_size # buffer has to have room so you can store |
| 104 | self.obs_buf[self.ptr] = obs |
| 105 | self.act_buf[self.ptr] = act |
| 106 | self.rew_buf[self.ptr] = rew |
| 107 | self.val_buf[self.ptr] = val |
| 108 | self.logp_buf[self.ptr] = logp |
| 109 | self.mean_buf[self.ptr] = mean |
| 110 | self.log_std_buf[self.ptr] = log_std |
| 111 | self.ptr += 1 |
| 112 | |
| 113 | def finish_path(self, last_val=0): |
| 114 | """ |