Perform the run loop. Run the environment loop either for `num_episodes` episodes or for at least `num_steps` steps (the last episode is always run until completion, so the total number of steps may be slightly more than `num_steps`). At least one of these two arguments has to be No
(self,
num_episodes: Optional[int] = None,
num_steps: Optional[int] = None)
| 142 | return result |
| 143 | |
| 144 | def run(self, |
| 145 | num_episodes: Optional[int] = None, |
| 146 | num_steps: Optional[int] = None): |
| 147 | """Perform the run loop. |
| 148 | |
| 149 | Run the environment loop either for `num_episodes` episodes or for at |
| 150 | least `num_steps` steps (the last episode is always run until completion, |
| 151 | so the total number of steps may be slightly more than `num_steps`). |
| 152 | At least one of these two arguments has to be None. |
| 153 | |
| 154 | Upon termination of an episode a new episode will be started. If the number |
| 155 | of episodes and the number of steps are not given then this will interact |
| 156 | with the environment infinitely. |
| 157 | |
| 158 | Args: |
| 159 | num_episodes: number of episodes to run the loop for. |
| 160 | num_steps: minimal number of steps to run the loop for. |
| 161 | |
| 162 | Raises: |
| 163 | ValueError: If both 'num_episodes' and 'num_steps' are not None. |
| 164 | """ |
| 165 | |
| 166 | if not (num_episodes is None or num_steps is None): |
| 167 | raise ValueError('Either "num_episodes" or "num_steps" should be None.') |
| 168 | |
| 169 | def should_terminate(episode_count: int, step_count: int) -> bool: |
| 170 | return ((num_episodes is not None and episode_count >= num_episodes) or |
| 171 | (num_steps is not None and step_count >= num_steps)) |
| 172 | |
| 173 | episode_count, step_count = 0, 0 |
| 174 | with signals.runtime_terminator(): |
| 175 | while not should_terminate(episode_count, step_count): |
| 176 | result = self.run_episode() |
| 177 | episode_count += 1 |
| 178 | step_count += result['episode_length'] |
| 179 | # Log the given episode results. |
| 180 | self._logger.write(result) |
| 181 | |
| 182 | # Placeholder for an EnvironmentLoop alias |
| 183 |