(self, zero_stage, dtype, deepcompile)
| 81 | @pytest.mark.parametrize('zero_stage', [1, 3]) |
| 82 | @pytest.mark.parametrize('deepcompile', [True]) # deepcompile==False is included in test_compile_zero |
| 83 | def test(self, zero_stage, dtype, deepcompile): |
| 84 | if not required_torch_version(min_version=2.6): |
| 85 | pytest.skip("DeepCompile requires PyTorch >= v2.6") |
| 86 | |
| 87 | if dtype == torch.bfloat16: |
| 88 | skip_on_arch(min_arch=8) |
| 89 | if dtype == torch.bfloat16 and not bf16_required_version_check(): |
| 90 | pytest.skip( |
| 91 | "DeepSpeed BFloat16 tests need NCCL >= 2.10.3, CUDA >=11.0, and HW support for BFloat16 to run correctly" |
| 92 | ) |
| 93 | if get_accelerator().device_name() == "cpu": |
| 94 | pytest.skip("CPU does not support this test yet") |
| 95 | |
| 96 | config_dict = { |
| 97 | "train_micro_batch_size_per_gpu": 1, |
| 98 | "steps_per_print": 1, |
| 99 | "optimizer": { |
| 100 | "type": "Adam", |
| 101 | "params": { |
| 102 | "lr": 0.00015 |
| 103 | } |
| 104 | }, |
| 105 | "zero_optimization": { |
| 106 | "stage": zero_stage, |
| 107 | }, |
| 108 | "compile": { |
| 109 | "deepcompile": deepcompile |
| 110 | } |
| 111 | } |
| 112 | |
| 113 | if dtype == torch.float16: |
| 114 | config_dict["fp16"] = {"enabled": True, "initial_scale_power": 8} |
| 115 | elif dtype == torch.bfloat16: |
| 116 | config_dict["bf16"] = {"enabled": True} |
| 117 | |
| 118 | # Need warmup steps |
| 119 | compare_loss(self, config_dict, dtype, iteration=10) |
| 120 | |
| 121 | def test_zero1_releases_grad_buffers_after_optimizer_step(self): |
| 122 | if not required_torch_version(min_version=2.6): |
nothing calls this directly
no test coverage detected