(version: str)
| 223 | |
| 224 | |
| 225 | def get_ltx2_transformer_config(version: str) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]: |
| 226 | if version == "test": |
| 227 | # Produces a transformer of the same size as used in test_models_transformer_ltx2.py |
| 228 | config = { |
| 229 | "model_id": "diffusers-internal-dev/dummy-ltx2", |
| 230 | "diffusers_config": { |
| 231 | "in_channels": 4, |
| 232 | "out_channels": 4, |
| 233 | "patch_size": 1, |
| 234 | "patch_size_t": 1, |
| 235 | "num_attention_heads": 2, |
| 236 | "attention_head_dim": 8, |
| 237 | "cross_attention_dim": 16, |
| 238 | "vae_scale_factors": (8, 32, 32), |
| 239 | "pos_embed_max_pos": 20, |
| 240 | "base_height": 2048, |
| 241 | "base_width": 2048, |
| 242 | "audio_in_channels": 4, |
| 243 | "audio_out_channels": 4, |
| 244 | "audio_patch_size": 1, |
| 245 | "audio_patch_size_t": 1, |
| 246 | "audio_num_attention_heads": 2, |
| 247 | "audio_attention_head_dim": 4, |
| 248 | "audio_cross_attention_dim": 8, |
| 249 | "audio_scale_factor": 4, |
| 250 | "audio_pos_embed_max_pos": 20, |
| 251 | "audio_sampling_rate": 16000, |
| 252 | "audio_hop_length": 160, |
| 253 | "num_layers": 2, |
| 254 | "activation_fn": "gelu-approximate", |
| 255 | "qk_norm": "rms_norm_across_heads", |
| 256 | "norm_elementwise_affine": False, |
| 257 | "norm_eps": 1e-6, |
| 258 | "caption_channels": 16, |
| 259 | "attention_bias": True, |
| 260 | "attention_out_bias": True, |
| 261 | "rope_theta": 10000.0, |
| 262 | "rope_double_precision": False, |
| 263 | "causal_offset": 1, |
| 264 | "timestep_scale_multiplier": 1000, |
| 265 | "cross_attn_timestep_scale_multiplier": 1, |
| 266 | }, |
| 267 | } |
| 268 | rename_dict = LTX_2_0_TRANSFORMER_KEYS_RENAME_DICT |
| 269 | special_keys_remap = LTX_2_0_TRANSFORMER_SPECIAL_KEYS_REMAP |
| 270 | elif version == "2.0": |
| 271 | config = { |
| 272 | "model_id": "Lightricks/LTX-2", |
| 273 | "diffusers_config": { |
| 274 | "in_channels": 128, |
| 275 | "out_channels": 128, |
| 276 | "patch_size": 1, |
| 277 | "patch_size_t": 1, |
| 278 | "num_attention_heads": 32, |
| 279 | "attention_head_dim": 128, |
| 280 | "cross_attention_dim": 4096, |
| 281 | "vae_scale_factors": (8, 32, 32), |
| 282 | "pos_embed_max_pos": 20, |
no outgoing calls
no test coverage detected
searching dependent graphs…