Sets seeds for all random libraries. Args: seed (int): seed for random states
(self, seed: int, dpseed_with_tpoffset: bool = False)
| 502 | logger.info(f"process rank {global_rank} is bound to host:{socket.gethostname()} device: {device_ordinal}") |
| 503 | |
| 504 | def set_seed(self, seed: int, dpseed_with_tpoffset: bool = False): |
| 505 | """Sets seeds for all random libraries. |
| 506 | |
| 507 | Args: |
| 508 | seed (int): seed for random states |
| 509 | """ |
| 510 | pipeline_offset = self._local_ranks.get(ParallelMode.PIPELINE, 0) |
| 511 | global_rank = self.get_global_rank() |
| 512 | |
| 513 | random.seed(seed) |
| 514 | np.random.seed(seed) |
| 515 | torch.manual_seed(seed) |
| 516 | assert torch.cuda.is_available() |
| 517 | |
| 518 | # data parallel seed are kept the same in the same pipeline stage |
| 519 | dp_seed = seed |
| 520 | if dpseed_with_tpoffset: |
| 521 | dp_seed = seed + pipeline_offset * 1024 |
| 522 | add_seed(ParallelMode.DATA, dp_seed) |
| 523 | |
| 524 | # model parallel seeds are different across ranks |
| 525 | if self.is_initialized(ParallelMode.TENSOR): |
| 526 | tp_rank = self.get_local_rank(ParallelMode.TENSOR) |
| 527 | tp_seed = seed + tp_rank + pipeline_offset * 1024 |
| 528 | add_seed(ParallelMode.TENSOR, tp_seed) |
| 529 | |
| 530 | set_mode(ParallelMode.DATA) |
| 531 | |
| 532 | seeds = get_seeds() |
| 533 | seed_str = ", ".join([f"{k}: {v}" for k, v in seeds.items()]) |
| 534 | logger.info( |
| 535 | f"initialized seed on rank {global_rank}, " |
| 536 | f"numpy: {seed}, python random: {seed}, {seed_str}," |
| 537 | f"the default parallel seed is {ParallelMode.DATA}." |
| 538 | ) |
| 539 | |
| 540 | def set_virtual_pipeline_parallel_size(self, size): |
| 541 | self.virtual_pipeline_parallel_size = size |
no test coverage detected