(
version: str, timestep_conditioning: bool = False
)
| 507 | |
| 508 | |
| 509 | def get_ltx2_video_vae_config( |
| 510 | version: str, timestep_conditioning: bool = False |
| 511 | ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]: |
| 512 | if version == "test": |
| 513 | config = { |
| 514 | "model_id": "diffusers-internal-dev/dummy-ltx2", |
| 515 | "diffusers_config": { |
| 516 | "in_channels": 3, |
| 517 | "out_channels": 3, |
| 518 | "latent_channels": 128, |
| 519 | "block_out_channels": (256, 512, 1024, 2048), |
| 520 | "down_block_types": ( |
| 521 | "LTX2VideoDownBlock3D", |
| 522 | "LTX2VideoDownBlock3D", |
| 523 | "LTX2VideoDownBlock3D", |
| 524 | "LTX2VideoDownBlock3D", |
| 525 | ), |
| 526 | "decoder_block_out_channels": (256, 512, 1024), |
| 527 | "layers_per_block": (4, 6, 6, 2, 2), |
| 528 | "decoder_layers_per_block": (5, 5, 5, 5), |
| 529 | "spatio_temporal_scaling": (True, True, True, True), |
| 530 | "decoder_spatio_temporal_scaling": (True, True, True), |
| 531 | "decoder_inject_noise": (False, False, False, False), |
| 532 | "downsample_type": ("spatial", "temporal", "spatiotemporal", "spatiotemporal"), |
| 533 | "upsample_residual": (True, True, True), |
| 534 | "upsample_factor": (2, 2, 2), |
| 535 | "timestep_conditioning": timestep_conditioning, |
| 536 | "patch_size": 4, |
| 537 | "patch_size_t": 1, |
| 538 | "resnet_norm_eps": 1e-6, |
| 539 | "encoder_causal": True, |
| 540 | "decoder_causal": False, |
| 541 | "encoder_spatial_padding_mode": "zeros", |
| 542 | "decoder_spatial_padding_mode": "reflect", |
| 543 | "spatial_compression_ratio": 32, |
| 544 | "temporal_compression_ratio": 8, |
| 545 | }, |
| 546 | } |
| 547 | rename_dict = LTX_2_0_VIDEO_VAE_RENAME_DICT |
| 548 | special_keys_remap = LTX_2_0_VAE_SPECIAL_KEYS_REMAP |
| 549 | elif version == "2.0": |
| 550 | config = { |
| 551 | "model_id": "Lightricks/LTX-2", |
| 552 | "diffusers_config": { |
| 553 | "in_channels": 3, |
| 554 | "out_channels": 3, |
| 555 | "latent_channels": 128, |
| 556 | "block_out_channels": (256, 512, 1024, 2048), |
| 557 | "down_block_types": ( |
| 558 | "LTX2VideoDownBlock3D", |
| 559 | "LTX2VideoDownBlock3D", |
| 560 | "LTX2VideoDownBlock3D", |
| 561 | "LTX2VideoDownBlock3D", |
| 562 | ), |
| 563 | "decoder_block_out_channels": (256, 512, 1024), |
| 564 | "layers_per_block": (4, 6, 6, 2, 2), |
| 565 | "decoder_layers_per_block": (5, 5, 5, 5), |
| 566 | "spatio_temporal_scaling": (True, True, True, True), |
no outgoing calls
no test coverage detected
searching dependent graphs…