hub / github.com/dmlc/dgl / test_record_stream_ndarray

Function test_record_stream_ndarray

tests/python/pytorch/test_ffi-stream.py:86–122 · view source on GitHub ↗

()

Source from the content-addressed store, hash-verified

84	)
85	# borrowed from PyTorch, test/test_cuda.py: test_record_stream()
86	def test_record_stream_ndarray():
87	cycles_per_ms = _get_cycles_per_ms()
88
89	t = nd.array(np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32), ctx=nd.cpu())
90	t.pin_memory_()
91	result = nd.empty([4], ctx=nd.gpu(0))
92	stream = torch.cuda.Stream()
93	ptr = [None]
94
95	# Performs the CPU->GPU copy in a background stream
96	def perform_copy():
97	with torch.cuda.stream(stream):
98	tmp = t.copyto(nd.gpu(0))
99	ptr[0] = F.from_dgl_nd(tmp).data_ptr()
100	torch.cuda.current_stream().wait_stream(stream)
101	tmp.record_stream(to_dgl_stream_handle(torch.cuda.current_stream()))
102	torch.cuda._sleep(int(50 * cycles_per_ms)) # delay the copy
103	result.copyfrom(tmp)
104
105	perform_copy()
106	with torch.cuda.stream(stream):
107	tmp2 = nd.empty([4], ctx=nd.gpu(0))
108	assert (
109	F.from_dgl_nd(tmp2).data_ptr() != ptr[0]
110	), "allocation re-used too soon"
111
112	assert torch.equal(
113	F.from_dgl_nd(result).cpu(), torch.tensor([1.0, 2.0, 3.0, 4.0])
114	)
115
116	# Check that the block will be re-used after the main stream finishes
117	torch.cuda.current_stream().synchronize()
118	with torch.cuda.stream(stream):
119	tmp3 = nd.empty([4], ctx=nd.gpu(0))
120	assert (
121	F.from_dgl_nd(tmp3).data_ptr() == ptr[0]
122	), "allocation not re-used"
123
124
125	@unittest.skipIf(

test_ffi-stream.pyFile · 0.85

_get_cycles_per_msFunction · 0.85

perform_copyFunction · 0.85

cpuMethod · 0.45

pin_memory_Method · 0.45

no test coverage detected