MCPcopy Index your code
hub / github.com/huggingface/diffusers / AttentionBackendName

Class AttentionBackendName

src/diffusers/models/attention_dispatch.py:224–264  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

222
223
224class AttentionBackendName(str, Enum):
225 # EAGER = "eager"
226
227 # `flash-attn`
228 FLASH = "flash"
229 FLASH_HUB = "flash_hub"
230 FLASH_VARLEN = "flash_varlen"
231 FLASH_VARLEN_HUB = "flash_varlen_hub"
232 FLASH_4_HUB = "flash_4_hub"
233 _FLASH_3 = "_flash_3"
234 _FLASH_VARLEN_3 = "_flash_varlen_3"
235 _FLASH_3_HUB = "_flash_3_hub"
236 _FLASH_3_VARLEN_HUB = "_flash_3_varlen_hub"
237
238 # `aiter`
239 AITER = "aiter"
240
241 # PyTorch native
242 FLEX = "flex"
243 NATIVE = "native"
244 _NATIVE_CUDNN = "_native_cudnn"
245 _NATIVE_EFFICIENT = "_native_efficient"
246 _NATIVE_FLASH = "_native_flash"
247 _NATIVE_MATH = "_native_math"
248 _NATIVE_NPU = "_native_npu"
249 _NATIVE_XLA = "_native_xla"
250
251 # `sageattention`
252 SAGE = "sage"
253 SAGE_HUB = "sage_hub"
254 SAGE_VARLEN = "sage_varlen"
255 _SAGE_QK_INT8_PV_FP8_CUDA = "_sage_qk_int8_pv_fp8_cuda"
256 _SAGE_QK_INT8_PV_FP8_CUDA_SM90 = "_sage_qk_int8_pv_fp8_cuda_sm90"
257 _SAGE_QK_INT8_PV_FP16_CUDA = "_sage_qk_int8_pv_fp16_cuda"
258 _SAGE_QK_INT8_PV_FP16_TRITON = "_sage_qk_int8_pv_fp16_triton"
259 # TODO: let's not add support for Sparge Attention now because it requires tuning per model
260 # We can look into supporting something "autotune"-ing in the future
261 # SPARGE = "sparge"
262
263 # `xformers`
264 XFORMERS = "xformers"
265
266
267class _AttentionBackendRegistry:

Callers 7

set_attention_backendMethod · 0.70
enable_parallelismMethod · 0.70
attention_backendFunction · 0.70
dispatch_attention_fnFunction · 0.70
set_attention_backendMethod · 0.70

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…