Map from observation to action, with 0.01 greedy.
(s)
| 16 | |
| 17 | def play_one_episode(env, func, render=False): |
| 18 | def predict(s): |
| 19 | """ |
| 20 | Map from observation to action, with 0.01 greedy. |
| 21 | """ |
| 22 | s = np.expand_dims(s, 0) # batch |
| 23 | act = func(s)[0][0].argmax() |
| 24 | if random.random() < 0.01: |
| 25 | spc = env.action_space |
| 26 | act = spc.sample() |
| 27 | return act |
| 28 | |
| 29 | ob = env.reset() |
| 30 | sum_r = 0 |
no test coverage detected