| 35 | # gone. Without this, every life loss triggers a full env.reset() — burning |
| 36 | # frames on noop_max + FIRE and breaking long-horizon credit assignment. |
| 37 | class LifeLossTerminalEnv(gym.Wrapper): |
| 38 | def __init__(self, env): |
| 39 | super().__init__(env) |
| 40 | self.lives = 0 |
| 41 | self.game_over = True |
| 42 | |
| 43 | def step(self, action): |
| 44 | obs, reward, terminated, truncated, info = self.env.step(action) |
| 45 | self.game_over = terminated or truncated |
| 46 | lives = info.get("lives", 0) |
| 47 | if 0 < lives < self.lives: |
| 48 | terminated = True |
| 49 | self.lives = lives |
| 50 | info["game_over"] = self.game_over |
| 51 | return obs, reward, terminated, truncated, info |
| 52 | |
| 53 | def reset(self, **kwargs): |
| 54 | if self.game_over: |
| 55 | obs, info = self.env.reset(**kwargs) |
| 56 | else: |
| 57 | # Fake terminal from a life loss — advance one frame instead of |
| 58 | # resetting so the game keeps its remaining lives. |
| 59 | obs, _, terminated, truncated, info = self.env.step(0) |
| 60 | if terminated or truncated: |
| 61 | obs, info = self.env.reset(**kwargs) |
| 62 | self.lives = info.get("lives", 0) |
| 63 | return obs, info |
| 64 | |
| 65 | ENV_IDS = { |
| 66 | "breakout": "ALE/Breakout-v5", |