(args)
| 1047 | |
| 1048 | |
| 1049 | def main(args): |
| 1050 | vae_dtype = DTYPE_MAPPING[args.vae_dtype] |
| 1051 | audio_vae_dtype = DTYPE_MAPPING[args.audio_vae_dtype] |
| 1052 | dit_dtype = DTYPE_MAPPING[args.dit_dtype] |
| 1053 | vocoder_dtype = DTYPE_MAPPING[args.vocoder_dtype] |
| 1054 | text_encoder_dtype = DTYPE_MAPPING[args.text_encoder_dtype] |
| 1055 | |
| 1056 | combined_ckpt = None |
| 1057 | load_combined_models = any( |
| 1058 | [ |
| 1059 | args.vae, |
| 1060 | args.audio_vae, |
| 1061 | args.dit, |
| 1062 | args.vocoder, |
| 1063 | args.connectors, |
| 1064 | args.full_pipeline, |
| 1065 | args.upsample_pipeline, |
| 1066 | ] |
| 1067 | ) |
| 1068 | if args.combined_filename is not None and load_combined_models: |
| 1069 | combined_ckpt = load_original_checkpoint(args, filename=args.combined_filename) |
| 1070 | |
| 1071 | if args.vae or args.full_pipeline or args.upsample_pipeline: |
| 1072 | if args.vae_filename is not None: |
| 1073 | original_vae_ckpt = load_hub_or_local_checkpoint(filename=args.vae_filename) |
| 1074 | elif combined_ckpt is not None: |
| 1075 | original_vae_ckpt = get_model_state_dict_from_combined_ckpt(combined_ckpt, args.vae_prefix) |
| 1076 | vae = convert_ltx2_video_vae( |
| 1077 | original_vae_ckpt, version=args.version, timestep_conditioning=args.timestep_conditioning |
| 1078 | ) |
| 1079 | if not args.full_pipeline and not args.upsample_pipeline: |
| 1080 | vae.to(vae_dtype).save_pretrained(os.path.join(args.output_path, "vae")) |
| 1081 | |
| 1082 | if args.audio_vae or args.full_pipeline: |
| 1083 | if args.audio_vae_filename is not None: |
| 1084 | original_audio_vae_ckpt = load_hub_or_local_checkpoint(filename=args.audio_vae_filename) |
| 1085 | elif combined_ckpt is not None: |
| 1086 | original_audio_vae_ckpt = get_model_state_dict_from_combined_ckpt(combined_ckpt, args.audio_vae_prefix) |
| 1087 | audio_vae = convert_ltx2_audio_vae(original_audio_vae_ckpt, version=args.version) |
| 1088 | if not args.full_pipeline: |
| 1089 | audio_vae.to(audio_vae_dtype).save_pretrained(os.path.join(args.output_path, "audio_vae")) |
| 1090 | |
| 1091 | if args.dit or args.full_pipeline: |
| 1092 | if args.dit_filename is not None: |
| 1093 | original_dit_ckpt = load_hub_or_local_checkpoint(filename=args.dit_filename) |
| 1094 | elif combined_ckpt is not None: |
| 1095 | original_dit_ckpt = get_model_state_dict_from_combined_ckpt(combined_ckpt, args.dit_prefix) |
| 1096 | transformer = convert_ltx2_transformer(original_dit_ckpt, version=args.version) |
| 1097 | if not args.full_pipeline: |
| 1098 | transformer.to(dit_dtype).save_pretrained(os.path.join(args.output_path, "transformer")) |
| 1099 | |
| 1100 | if args.connectors or args.full_pipeline: |
| 1101 | if args.dit_filename is not None: |
| 1102 | original_connectors_ckpt = load_hub_or_local_checkpoint(filename=args.dit_filename) |
| 1103 | elif combined_ckpt is not None: |
| 1104 | original_connectors_ckpt = get_model_state_dict_from_combined_ckpt(combined_ckpt, args.dit_prefix) |
| 1105 | connectors = convert_ltx2_connectors(original_connectors_ckpt, version=args.version) |
| 1106 | if not args.full_pipeline: |
no test coverage detected
searching dependent graphs…