(num_episodes=5)
| 189 | |
| 190 | test_returns = [] |
| 191 | def test_agent(num_episodes=5): |
| 192 | t0 = datetime.now() |
| 193 | n_steps = 0 |
| 194 | for j in range(num_episodes): |
| 195 | s, episode_return, episode_length, d = test_env.reset(), 0, 0, False |
| 196 | while not (d or (episode_length == max_episode_length)): |
| 197 | # Take deterministic actions at test time (noise_scale=0) |
| 198 | test_env.render() |
| 199 | s, r, d, _ = test_env.step(get_action(s, 0)) |
| 200 | episode_return += r |
| 201 | episode_length += 1 |
| 202 | n_steps += 1 |
| 203 | print('test return:', episode_return, 'episode_length:', episode_length) |
| 204 | test_returns.append(episode_return) |
| 205 | # print("test steps per sec:", n_steps / (datetime.now() - t0).total_seconds()) |
| 206 | |
| 207 | |
| 208 | # Main loop: play episode and train |
no test coverage detected