Method sum

triton_kernels/tensor.py:186–193 · view source on GitHub ↗

(self, partials_block_size)

Source from the content-addressed store, hash-verified

184	self.scratchpad = scratchpad
185
186	def sum(self, partials_block_size):
187	_, n_cols = self.shape
188	dev = self.device
189	if self.scratchpad is None:
190	self.scratchpad = clear_sums(n_cols, dev)
191	out_ret = self.scratchpad[:n_cols]
192	self.scratchpad = None # throw error if we try to sum again
193	return sum_bitmatrix_rows(self, out_ret, partials_block_size)
194
195
196	def get_layout(tensor: torch.Tensor \| Tensor \| None):

forwardMethod · 0.80

vpopcFunction · 0.80

_sum_bitmatrix_rowsFunction · 0.80

matmul_launch_metadataFunction · 0.80

_topk_backwardFunction · 0.80

_expt_data_memsetFunction · 0.80

_routing_compute_expt_offsFunction · 0.80

_routing_compute_indx_offsFunction · 0.80

ref.pyFile · 0.80

computeFunction · 0.80

run_testFunction · 0.80

mha_refFunction · 0.80

clear_sumsFunction · 0.85

sum_bitmatrix_rowsFunction · 0.85

run_testFunction · 0.64

test_smooth_quant_rms_normMethod · 0.64

test_quantize_per_tokenMethod · 0.64

_case_qserve_gemm_per_channelMethod · 0.64

test_get_draft_token_arrayMethod · 0.64

test_unpack_gen_dataMethod · 0.64

test_packed_position_idsMethod · 0.64

test_beams2treeMethod · 0.64

test_mixture_of_expertsMethod · 0.64

test_mlp_comparisonMethod · 0.64

test_ootb_comparisonMethod · 0.64

generate_referenceMethod · 0.64