(model, env, n_episodes=20)
| 68 | |
| 69 | |
| 70 | def test_agent(model, env, n_episodes=20): |
| 71 | reward_per_episode = np.zeros(n_episodes) |
| 72 | for it in range(n_episodes): |
| 73 | done = False |
| 74 | truncated = False |
| 75 | episode_reward = 0 |
| 76 | s, info = env.reset() |
| 77 | while not (done or truncated): |
| 78 | a = epsilon_greedy(model, s, eps=0) |
| 79 | s, r, done, truncated, info = env.step(a) |
| 80 | episode_reward += r |
| 81 | reward_per_episode[it] = episode_reward |
| 82 | return np.mean(reward_per_episode) |
| 83 | |
| 84 | |
| 85 | def watch_agent(model, env, eps): |
no test coverage detected