Call this at the end of an epoch to get all of the data from the buffer, with advantages appropriately normalized (shifted to have mean zero and std one). Also, resets some pointers in the buffer.
(self)
| 158 | return self.ptr == self.max_size |
| 159 | |
| 160 | def get(self): |
| 161 | """ |
| 162 | Call this at the end of an epoch to get all of the data from |
| 163 | the buffer, with advantages appropriately normalized (shifted to have |
| 164 | mean zero and std one). Also, resets some pointers in the buffer. |
| 165 | """ |
| 166 | assert self.ptr == self.max_size # buffer has to be full before you can get |
| 167 | self.ptr, self.path_start_idx = 0, 0 |
| 168 | |
| 169 | # the next two lines implement the advantage normalization trick |
| 170 | adv_mean, adv_std = np.mean(self.adv_buf), np.std(self.adv_buf) |
| 171 | self.adv_buf = (self.adv_buf - adv_mean) / adv_std |
| 172 | return [self.obs_buf, self.act_buf, self.adv_buf, self.ret_buf, self.logp_buf, self.mean_buf, self.log_std_buf] |
| 173 | |
| 174 | |
| 175 | """ |
no outgoing calls