hub / github.com/huggingface/diffusers / get_ltx2_video_vae_config

Function get_ltx2_video_vae_config

scripts/convert_ltx2_to_diffusers.py:509–625 · view source on GitHub ↗

(
    version: str, timestep_conditioning: bool = False
)

Source from the content-addressed store, hash-verified

507
508
509	def get_ltx2_video_vae_config(
510	version: str, timestep_conditioning: bool = False
511	) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
512	if version == "test":
513	config = {
514	"model_id": "diffusers-internal-dev/dummy-ltx2",
515	"diffusers_config": {
516	"in_channels": 3,
517	"out_channels": 3,
518	"latent_channels": 128,
519	"block_out_channels": (256, 512, 1024, 2048),
520	"down_block_types": (
521	"LTX2VideoDownBlock3D",
522	"LTX2VideoDownBlock3D",
523	"LTX2VideoDownBlock3D",
524	"LTX2VideoDownBlock3D",
525	),
526	"decoder_block_out_channels": (256, 512, 1024),
527	"layers_per_block": (4, 6, 6, 2, 2),
528	"decoder_layers_per_block": (5, 5, 5, 5),
529	"spatio_temporal_scaling": (True, True, True, True),
530	"decoder_spatio_temporal_scaling": (True, True, True),
531	"decoder_inject_noise": (False, False, False, False),
532	"downsample_type": ("spatial", "temporal", "spatiotemporal", "spatiotemporal"),
533	"upsample_residual": (True, True, True),
534	"upsample_factor": (2, 2, 2),
535	"timestep_conditioning": timestep_conditioning,
536	"patch_size": 4,
537	"patch_size_t": 1,
538	"resnet_norm_eps": 1e-6,
539	"encoder_causal": True,
540	"decoder_causal": False,
541	"encoder_spatial_padding_mode": "zeros",
542	"decoder_spatial_padding_mode": "reflect",
543	"spatial_compression_ratio": 32,
544	"temporal_compression_ratio": 8,
545	},
546	}
547	rename_dict = LTX_2_0_VIDEO_VAE_RENAME_DICT
548	special_keys_remap = LTX_2_0_VAE_SPECIAL_KEYS_REMAP
549	elif version == "2.0":
550	config = {
551	"model_id": "Lightricks/LTX-2",
552	"diffusers_config": {
553	"in_channels": 3,
554	"out_channels": 3,
555	"latent_channels": 128,
556	"block_out_channels": (256, 512, 1024, 2048),
557	"down_block_types": (
558	"LTX2VideoDownBlock3D",
559	"LTX2VideoDownBlock3D",
560	"LTX2VideoDownBlock3D",
561	"LTX2VideoDownBlock3D",
562	),
563	"decoder_block_out_channels": (256, 512, 1024),
564	"layers_per_block": (4, 6, 6, 2, 2),
565	"decoder_layers_per_block": (5, 5, 5, 5),
566	"spatio_temporal_scaling": (True, True, True, True),

Callers 1

convert_ltx2_video_vaeFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…