return a tuple of (s,r,a,o), where s is of shape self._output_shape, which is [H, W, (hist_len+1) * channel] if input is (H, W, channel)
(self, idx)
| 63 | self._curr_pos = (self._curr_pos + 1) % self.max_size |
| 64 | |
| 65 | def sample(self, idx): |
| 66 | """ return a tuple of (s,r,a,o), |
| 67 | where s is of shape self._output_shape, which is |
| 68 | [H, W, (hist_len+1) * channel] if input is (H, W, channel)""" |
| 69 | idx = (self._curr_pos + idx) % self._curr_size |
| 70 | k = self.history_len + 1 |
| 71 | if idx + k <= self._curr_size: |
| 72 | state = self.state[idx: idx + k] |
| 73 | reward = self.reward[idx: idx + k] |
| 74 | action = self.action[idx: idx + k] |
| 75 | isOver = self.isOver[idx: idx + k] |
| 76 | else: |
| 77 | end = idx + k - self._curr_size |
| 78 | state = self._slice(self.state, idx, end) |
| 79 | reward = self._slice(self.reward, idx, end) |
| 80 | action = self._slice(self.action, idx, end) |
| 81 | isOver = self._slice(self.isOver, idx, end) |
| 82 | ret = self._pad_sample(state, reward, action, isOver) |
| 83 | return ret |
| 84 | |
| 85 | # the next_state is a different episode if current_state.isOver==True |
| 86 | def _pad_sample(self, state, reward, action, isOver): |
no test coverage detected