Helper function for reproducible behavior during distributed training. See - https://pytorch.org/docs/stable/notes/randomness.html for pytorch
()
| 1357 | |
| 1358 | |
| 1359 | def enable_full_determinism(): |
| 1360 | """ |
| 1361 | Helper function for reproducible behavior during distributed training. See |
| 1362 | - https://pytorch.org/docs/stable/notes/randomness.html for pytorch |
| 1363 | """ |
| 1364 | # Enable PyTorch deterministic mode. This potentially requires either the environment |
| 1365 | # variable 'CUDA_LAUNCH_BLOCKING' or 'CUBLAS_WORKSPACE_CONFIG' to be set, |
| 1366 | # depending on the CUDA version, so we set them both here |
| 1367 | os.environ["CUDA_LAUNCH_BLOCKING"] = "1" |
| 1368 | # Use larger workspace size for PyTorch 2.10+ to avoid CUBLAS_STATUS_NOT_INITIALIZED errors |
| 1369 | # (catches 2.11 dev versions which report as >= 2.10) |
| 1370 | if is_torch_version(">=", "2.10"): |
| 1371 | os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" |
| 1372 | else: |
| 1373 | os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8" |
| 1374 | torch.use_deterministic_algorithms(True) |
| 1375 | |
| 1376 | # Enable CUDNN deterministic mode |
| 1377 | torch.backends.cudnn.deterministic = True |
| 1378 | torch.backends.cudnn.benchmark = False |
| 1379 | torch.backends.cuda.matmul.allow_tf32 = False |
| 1380 | |
| 1381 | |
| 1382 | def disable_full_determinism(): |
no test coverage detected
searching dependent graphs…