| 22 | # after each reset / life loss. AtariPreprocessing only does NOOPs, so without |
| 23 | # this the agent wastes a lot of frames waiting for a random FIRE. |
| 24 | class FireResetEnv(gym.Wrapper): |
| 25 | def reset(self, **kwargs): |
| 26 | self.env.reset(**kwargs) |
| 27 | obs, _, terminated, truncated, _ = self.env.step(1) # FIRE |
| 28 | if terminated or truncated: |
| 29 | obs, _ = self.env.reset(**kwargs) |
| 30 | return obs, {} |
| 31 | |
| 32 | |
| 33 | # Treats each life as its own episode for bootstrapping (so Q-targets / GAE don't |