Class AttentionBackendName

src/diffusers/models/attention_dispatch.py:224–264 · view source on GitHub ↗

Source from the content-addressed store, hash-verified

222
223
224	class AttentionBackendName(str, Enum):
225	# EAGER = "eager"
226
227	# `flash-attn`
228	FLASH = "flash"
229	FLASH_HUB = "flash_hub"
230	FLASH_VARLEN = "flash_varlen"
231	FLASH_VARLEN_HUB = "flash_varlen_hub"
232	FLASH_4_HUB = "flash_4_hub"
233	_FLASH_3 = "_flash_3"
234	_FLASH_VARLEN_3 = "_flash_varlen_3"
235	_FLASH_3_HUB = "_flash_3_hub"
236	_FLASH_3_VARLEN_HUB = "_flash_3_varlen_hub"
237
238	# `aiter`
239	AITER = "aiter"
240
241	# PyTorch native
242	FLEX = "flex"
243	NATIVE = "native"
244	_NATIVE_CUDNN = "_native_cudnn"
245	_NATIVE_EFFICIENT = "_native_efficient"
246	_NATIVE_FLASH = "_native_flash"
247	_NATIVE_MATH = "_native_math"
248	_NATIVE_NPU = "_native_npu"
249	_NATIVE_XLA = "_native_xla"
250
251	# `sageattention`
252	SAGE = "sage"
253	SAGE_HUB = "sage_hub"
254	SAGE_VARLEN = "sage_varlen"
255	_SAGE_QK_INT8_PV_FP8_CUDA = "_sage_qk_int8_pv_fp8_cuda"
256	_SAGE_QK_INT8_PV_FP8_CUDA_SM90 = "_sage_qk_int8_pv_fp8_cuda_sm90"
257	_SAGE_QK_INT8_PV_FP16_CUDA = "_sage_qk_int8_pv_fp16_cuda"
258	_SAGE_QK_INT8_PV_FP16_TRITON = "_sage_qk_int8_pv_fp16_triton"
259	# TODO: let's not add support for Sparge Attention now because it requires tuning per model
260	# We can look into supporting something "autotune"-ing in the future
261	# SPARGE = "sparge"
262
263	# `xformers`
264	XFORMERS = "xformers"
265
266
267	class _AttentionBackendRegistry:

set_attention_backendMethod · 0.70

enable_parallelismMethod · 0.70

_AttentionBackendRegistryClass · 0.70

attention_backendFunction · 0.70

dispatch_attention_fnFunction · 0.70

set_attention_backendMethod · 0.70

_get_current_attention_backendFunction · 0.50

no outgoing calls

no test coverage detected

searching dependent graphs…