Configuration for RocketKV sparse attention.
| 244 | |
| 245 | |
| 246 | class RocketSparseAttentionConfig(BaseSparseAttentionConfig): |
| 247 | """ |
| 248 | Configuration for RocketKV sparse attention. |
| 249 | """ |
| 250 | algorithm: ClassVar[str] = "rocket" |
| 251 | window_size: Optional[int] = Field( |
| 252 | default=32, description="The window size for snap KV.") |
| 253 | kernel_size: Optional[int] = Field( |
| 254 | default=63, description="The kernel size for snap KV.") |
| 255 | topr: Optional[Union[int, float]] = Field(default=128, description="Top-r") |
| 256 | topk: Optional[int] = Field(default=64, description="Top-k") |
| 257 | prompt_budget: Optional[int] = Field(default=2048, |
| 258 | description="Prompt budget") |
| 259 | page_size: Optional[int] = Field(default=4, description="Page size") |
| 260 | kt_cache_dtype: Optional[str] = Field( |
| 261 | default='float8_e5m2', |
| 262 | choices=['bfloat16', 'float8_e5m2'], |
| 263 | description="KT cache dtype", |
| 264 | ) |
| 265 | |
| 266 | @classmethod |
| 267 | def from_dict(cls, data: dict): |
| 268 | return cls(**data) |
| 269 | |
| 270 | def supports_backend(self, backend: str) -> bool: |
| 271 | return backend == "pytorch" |
| 272 | |
| 273 | def get_indices_block_size(self) -> int: |
| 274 | return self.page_size |
| 275 | |
| 276 | |
| 277 | class DeepSeekSparseAttentionConfig(BaseSparseAttentionConfig): |