(self, state, reward, action, isOver)
| 84 | |
| 85 | # the next_state is a different episode if current_state.isOver==True |
| 86 | def _pad_sample(self, state, reward, action, isOver): |
| 87 | # state: Hist+1,H,W,C |
| 88 | for k in range(self.history_len - 2, -1, -1): |
| 89 | if isOver[k]: |
| 90 | state = copy.deepcopy(state) |
| 91 | state[:k + 1].fill(0) |
| 92 | break |
| 93 | # move the first dim (history) to the last |
| 94 | state = np.moveaxis(state, 0, -1) |
| 95 | return (state, reward[-2], action[-2], isOver[-2]) |
| 96 | |
| 97 | def _slice(self, arr, start, end): |
| 98 | s1 = arr[start:] |