Class RocketSparseAttentionConfig

tensorrt_llm/llmapi/llm_args.py:246–274 · view source on GitHub ↗

Configuration for RocketKV sparse attention.

Source from the content-addressed store, hash-verified

244
245
246	class RocketSparseAttentionConfig(BaseSparseAttentionConfig):
247	"""
248	Configuration for RocketKV sparse attention.
249	"""
250	algorithm: ClassVar[str] = "rocket"
251	window_size: Optional[int] = Field(
252	default=32, description="The window size for snap KV.")
253	kernel_size: Optional[int] = Field(
254	default=63, description="The kernel size for snap KV.")
255	topr: Optional[Union[int, float]] = Field(default=128, description="Top-r")
256	topk: Optional[int] = Field(default=64, description="Top-k")
257	prompt_budget: Optional[int] = Field(default=2048,
258	description="Prompt budget")
259	page_size: Optional[int] = Field(default=4, description="Page size")
260	kt_cache_dtype: Optional[str] = Field(
261	default='float8_e5m2',
262	choices=['bfloat16', 'float8_e5m2'],
263	description="KT cache dtype",
264	)
265
266	@classmethod
267	def from_dict(cls, data: dict):
268	return cls(**data)
269
270	def supports_backend(self, backend: str) -> bool:
271	return backend == "pytorch"
272
273	def get_indices_block_size(self) -> int:
274	return self.page_size
275
276
277	class DeepSeekSparseAttentionConfig(BaseSparseAttentionConfig):

test_auto_dtypeMethod · 0.90

test_modelFunction · 0.90

test_sparse_kv_predictFunction · 0.90

test_sparse_attn_predictFunction · 0.90

run_RocketKVFunction · 0.90

initialize_llmFunction · 0.90

FieldFunction · 0.85

test_auto_dtypeMethod · 0.72

test_modelFunction · 0.72

test_sparse_kv_predictFunction · 0.72

test_sparse_attn_predictFunction · 0.72