Continuously monitors a directory and evaluates new checkpoints in it. This method continuously monitors a directory as specified by this Controller's CheckpointManager init arg and runs evaluation on the checkpoints found there. Args: steps: The number of steps to run when e
(
self,
steps: int = -1,
timeout: Optional[Union[int, float]] = None,
timeout_fn: Optional[Callable[[], bool]] = None,
)
| 398 | return output |
| 399 | |
| 400 | def evaluate_continuously( |
| 401 | self, |
| 402 | steps: int = -1, |
| 403 | timeout: Optional[Union[int, float]] = None, |
| 404 | timeout_fn: Optional[Callable[[], bool]] = None, |
| 405 | ) -> Optional[runner.Output]: |
| 406 | """Continuously monitors a directory and evaluates new checkpoints in it. |
| 407 | |
| 408 | This method continuously monitors a directory as specified by this |
| 409 | Controller's CheckpointManager init arg and runs evaluation on the |
| 410 | checkpoints found there. |
| 411 | |
| 412 | Args: |
| 413 | steps: The number of steps to run when evaluating. If -1, this method will |
| 414 | evaluate over the entire evaluation dataset. |
| 415 | timeout: The maximum number of seconds to wait between checkpoints. See |
| 416 | tf.train.checkpoints_iterator documentation. |
| 417 | timeout_fn: Optional callable to call after a timeout. If the function |
| 418 | returns True, then it means that no new checkpoints will be generated |
| 419 | and the iterator will exit. |
| 420 | |
| 421 | Returns: |
| 422 | The evaluation results as a dictionary mapping names to NumPy values. |
| 423 | |
| 424 | Raises: |
| 425 | ValueError: If no checkpoint found in `self.checkpoint_manager.directory`. |
| 426 | ValueError: If `evaluator` was not provided as a controller init arg. |
| 427 | """ |
| 428 | self._require("evaluator", for_method="evaluate_continuously") |
| 429 | self._require("checkpoint_manager", for_method="evaluate_continuously") |
| 430 | |
| 431 | output = None |
| 432 | assert isinstance(self.checkpoint_manager, tf.train.CheckpointManager) |
| 433 | for checkpoint_path in tf.train.checkpoints_iterator( |
| 434 | self.checkpoint_manager.directory, |
| 435 | timeout=timeout, |
| 436 | timeout_fn=timeout_fn): |
| 437 | self.restore_checkpoint(checkpoint_path) |
| 438 | output = self.evaluate(steps) |
| 439 | return output |
| 440 | |
| 441 | def restore_checkpoint(self, checkpoint_path: Optional[str] = None): |
| 442 | """Restores the model from a checkpoint. |