()
| 43 | F._default_context_str == "cpu", reason="stream only runs on GPU." |
| 44 | ) |
| 45 | def test_basics(): |
| 46 | g = rand_graph(10, 20, device=F.cpu()) |
| 47 | x = torch.ones(g.num_nodes(), 10) |
| 48 | result = OPS.copy_u_sum(g, x).to(F.ctx()) |
| 49 | |
| 50 | # launch on default stream used in DGL |
| 51 | xx = x.to(device=F.ctx()) |
| 52 | gg = g.to(device=F.ctx()) |
| 53 | OPS.copy_u_sum(gg, xx) |
| 54 | assert torch.equal(OPS.copy_u_sum(gg, xx), result) |
| 55 | |
| 56 | # launch on new stream created via torch.cuda |
| 57 | s = torch.cuda.Stream(device=F.ctx()) |
| 58 | with torch.cuda.stream(s): |
| 59 | xx = x.to(device=F.ctx(), non_blocking=True) |
| 60 | gg = g.to(device=F.ctx()) |
| 61 | OPS.copy_u_sum(gg, xx) |
| 62 | s.synchronize() |
| 63 | assert torch.equal(OPS.copy_u_sum(gg, xx), result) |
| 64 | |
| 65 | |
| 66 | @unittest.skipIf( |
no test coverage detected