(env, n_episodes=10000)
| 27 | |
| 28 | |
| 29 | def gather_samples(env, n_episodes=10000): |
| 30 | samples = [] |
| 31 | for _ in range(n_episodes): |
| 32 | s, info = env.reset() |
| 33 | done = False |
| 34 | truncated = False |
| 35 | while not (done or truncated): |
| 36 | a = env.action_space.sample() |
| 37 | sa = np.concatenate((s, [a])) |
| 38 | samples.append(sa) |
| 39 | |
| 40 | s, r, done, truncated, info = env.step(a) |
| 41 | return samples |
| 42 | |
| 43 | |
| 44 | class Model: |