(self, state, action, reward)
| 74 | return discounted |
| 75 | |
| 76 | def append_sample(self, state, action, reward): |
| 77 | self.states.append(state) |
| 78 | self.actions.append(action) |
| 79 | self.rewards.append(reward) |
| 80 | |
| 81 | # Single gradient step using the whole episode. |
| 82 | def train_model(self): |