Method split

fastdeploy/model_executor/layers/normalization.py:162–183 · view source on GitHub ↗

Split the input tensor across tensor parallel dimension. Args: x (paddle.Tensor): Input tensor to be split. Returns: paddle.Tensor: Splitted tensor.

(self, x)

Source from the content-addressed store, hash-verified

160	self.weight.set_value(weight_tensor.astype(self._norm_weight_dtype))
161
162	def split(self, x):
163	"""
164	Split the input tensor across tensor parallel dimension.
165
166	Args:
167	x (paddle.Tensor): Input tensor to be split.
168
169	Returns:
170	paddle.Tensor: Splitted tensor.
171	"""
172	token_num = x.shape[0]
173	token_num_per_rank = (token_num + self.tp_size - 1) // self.tp_size
174	# AllGather will hang when the data shapes on multi-ranks are different!
175	start_offset = self.tp_rank * token_num_per_rank
176	end_offset = (self.tp_rank + 1) * token_num_per_rank
177	if start_offset >= token_num:
178	start_offset = token_num
179	if end_offset > token_num:
180	end_offset = token_num
181	part_x = paddle.zeros(shape=[token_num_per_rank, x.shape[1]], dtype=x.dtype)
182	part_x[: (end_offset - start_offset), :] = x[start_offset:end_offset, :]
183	return part_x
184
185	def allgather(self, out, token_num):
186	"""

forwardMethod · 0.95

get_ext_filenameMethod · 0.80

_load_from_version_fileMethod · 0.80

__init__Method · 0.80

postprocess_devices_and_portsMethod · 0.80

init_cache_infoMethod · 0.80

_str_to_listMethod · 0.80

envs.pyFile · 0.80

show_filtered_argument_or_group_from_helpFunction · 0.80

_output_with_pagerFunction · 0.80

check_unified_ckptFunction · 0.80

parse_portsFunction · 0.80

no outgoing calls

test_cache_kv_with_rope_paddleMethod · 0.64

test_cache_kv_with_neox_rope_paddleMethod · 0.64

get_cuda_versionFunction · 0.64

setUpMethod · 0.64

qk_norm_paddleMethod · 0.64

stop_processesFunction · 0.64

safe_kill_cmdFunction · 0.64

get_rdma_nicsFunction · 0.64

test_validate_tools_empty_and_main_blockFunction · 0.64

test_get_cudnn_versionMethod · 0.64

create_batch_outputs_from_jsonlMethod · 0.64