(self, s, eps)
| 68 | self.models[a].partial_fit(X[0], G, self.eligibilities[a]) |
| 69 | |
| 70 | def sample_action(self, s, eps): |
| 71 | if np.random.random() < eps: |
| 72 | return self.env.action_space.sample() |
| 73 | else: |
| 74 | return np.argmax(self.predict(s)) |
| 75 | |
| 76 | |
| 77 | # returns a list of states_and_rewards, and the total reward |