MCPcopy
hub / github.com/dmlc/dgl / test_record_stream_ndarray

Function test_record_stream_ndarray

tests/python/pytorch/test_ffi-stream.py:86–122  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

84)
85# borrowed from PyTorch, test/test_cuda.py: test_record_stream()
86def test_record_stream_ndarray():
87 cycles_per_ms = _get_cycles_per_ms()
88
89 t = nd.array(np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32), ctx=nd.cpu())
90 t.pin_memory_()
91 result = nd.empty([4], ctx=nd.gpu(0))
92 stream = torch.cuda.Stream()
93 ptr = [None]
94
95 # Performs the CPU->GPU copy in a background stream
96 def perform_copy():
97 with torch.cuda.stream(stream):
98 tmp = t.copyto(nd.gpu(0))
99 ptr[0] = F.from_dgl_nd(tmp).data_ptr()
100 torch.cuda.current_stream().wait_stream(stream)
101 tmp.record_stream(to_dgl_stream_handle(torch.cuda.current_stream()))
102 torch.cuda._sleep(int(50 * cycles_per_ms)) # delay the copy
103 result.copyfrom(tmp)
104
105 perform_copy()
106 with torch.cuda.stream(stream):
107 tmp2 = nd.empty([4], ctx=nd.gpu(0))
108 assert (
109 F.from_dgl_nd(tmp2).data_ptr() != ptr[0]
110 ), "allocation re-used too soon"
111
112 assert torch.equal(
113 F.from_dgl_nd(result).cpu(), torch.tensor([1.0, 2.0, 3.0, 4.0])
114 )
115
116 # Check that the block will be re-used after the main stream finishes
117 torch.cuda.current_stream().synchronize()
118 with torch.cuda.stream(stream):
119 tmp3 = nd.empty([4], ctx=nd.gpu(0))
120 assert (
121 F.from_dgl_nd(tmp3).data_ptr() == ptr[0]
122 ), "allocation not re-used"
123
124
125@unittest.skipIf(

Callers 1

test_ffi-stream.pyFile · 0.85

Calls 4

_get_cycles_per_msFunction · 0.85
perform_copyFunction · 0.85
cpuMethod · 0.45
pin_memory_Method · 0.45

Tested by

no test coverage detected