Replay episodes forever using the supplied action picker. `get_action(state: np.ndarray) -> int`.
(env, get_action)
| 43 | |
| 44 | |
| 45 | def run_test_loop(env, get_action): |
| 46 | """Replay episodes forever using the supplied action picker. |
| 47 | |
| 48 | `get_action(state: np.ndarray) -> int`. |
| 49 | """ |
| 50 | while True: |
| 51 | state, _ = env.reset() |
| 52 | state = np.array(state, dtype=np.float32) |
| 53 | done = False |
| 54 | score = 0 |
| 55 | while not done: |
| 56 | quit_if_window_closed(env) |
| 57 | action = get_action(state) |
| 58 | next_state, reward, terminated, truncated, _ = env.step(action) |
| 59 | done = terminated or truncated |
| 60 | state = np.array(next_state, dtype=np.float32) |
| 61 | score += reward |
| 62 | print(f"test score: {score}") |