MCPcopy
hub / github.com/NVIDIA/TensorRT-LLM / RocketSparseAttentionConfig

Class RocketSparseAttentionConfig

tensorrt_llm/llmapi/llm_args.py:246–274  ·  view source on GitHub ↗

Configuration for RocketKV sparse attention.

Source from the content-addressed store, hash-verified

244
245
246class RocketSparseAttentionConfig(BaseSparseAttentionConfig):
247 """
248 Configuration for RocketKV sparse attention.
249 """
250 algorithm: ClassVar[str] = "rocket"
251 window_size: Optional[int] = Field(
252 default=32, description="The window size for snap KV.")
253 kernel_size: Optional[int] = Field(
254 default=63, description="The kernel size for snap KV.")
255 topr: Optional[Union[int, float]] = Field(default=128, description="Top-r")
256 topk: Optional[int] = Field(default=64, description="Top-k")
257 prompt_budget: Optional[int] = Field(default=2048,
258 description="Prompt budget")
259 page_size: Optional[int] = Field(default=4, description="Page size")
260 kt_cache_dtype: Optional[str] = Field(
261 default='float8_e5m2',
262 choices=['bfloat16', 'float8_e5m2'],
263 description="KT cache dtype",
264 )
265
266 @classmethod
267 def from_dict(cls, data: dict):
268 return cls(**data)
269
270 def supports_backend(self, backend: str) -> bool:
271 return backend == "pytorch"
272
273 def get_indices_block_size(self) -> int:
274 return self.page_size
275
276
277class DeepSeekSparseAttentionConfig(BaseSparseAttentionConfig):

Callers 6

test_auto_dtypeMethod · 0.90
test_modelFunction · 0.90
test_sparse_kv_predictFunction · 0.90
test_sparse_attn_predictFunction · 0.90
run_RocketKVFunction · 0.90
initialize_llmFunction · 0.90

Calls 1

FieldFunction · 0.85

Tested by 4

test_auto_dtypeMethod · 0.72
test_modelFunction · 0.72
test_sparse_kv_predictFunction · 0.72
test_sparse_attn_predictFunction · 0.72