()
| 32 | |
| 33 | |
| 34 | def parse_args(): |
| 35 | parser = argparse.ArgumentParser("VoxCPM full-finetune inference test (no LoRA)") |
| 36 | parser.add_argument( |
| 37 | "--ckpt_dir", |
| 38 | type=str, |
| 39 | required=True, |
| 40 | help="Checkpoint directory (contains pytorch_model.bin, config.json, audiovae.pth, etc.)", |
| 41 | ) |
| 42 | parser.add_argument( |
| 43 | "--text", |
| 44 | type=str, |
| 45 | required=True, |
| 46 | help="Target text to synthesize", |
| 47 | ) |
| 48 | parser.add_argument( |
| 49 | "--prompt_audio", |
| 50 | type=str, |
| 51 | default="", |
| 52 | help="Optional: reference audio path for voice cloning", |
| 53 | ) |
| 54 | parser.add_argument( |
| 55 | "--prompt_text", |
| 56 | type=str, |
| 57 | default="", |
| 58 | help="Optional: transcript of reference audio", |
| 59 | ) |
| 60 | parser.add_argument( |
| 61 | "--output", |
| 62 | type=str, |
| 63 | default="ft_test.wav", |
| 64 | help="Output wav file path", |
| 65 | ) |
| 66 | parser.add_argument( |
| 67 | "--cfg_value", |
| 68 | type=float, |
| 69 | default=2.0, |
| 70 | help="CFG scale (default: 2.0)", |
| 71 | ) |
| 72 | parser.add_argument( |
| 73 | "--inference_timesteps", |
| 74 | type=int, |
| 75 | default=10, |
| 76 | help="Diffusion inference steps (default: 10)", |
| 77 | ) |
| 78 | parser.add_argument( |
| 79 | "--max_len", |
| 80 | type=int, |
| 81 | default=600, |
| 82 | help="Max generation steps", |
| 83 | ) |
| 84 | parser.add_argument( |
| 85 | "--normalize", |
| 86 | action="store_true", |
| 87 | help="Enable text normalization", |
| 88 | ) |
| 89 | return parser.parse_args() |
| 90 | |
| 91 |
no outgoing calls
no test coverage detected
searching dependent graphs…