Initialize VoxCPM TTS pipeline. Args: voxcpm_model_path: Local filesystem path to the VoxCPM model assets (weights, configs, etc.). Typically the directory returned by a prior download step. zipenhancer_model_path: ModelScope acoustic
(
self,
voxcpm_model_path: str,
zipenhancer_model_path: str | None = "iic/speech_zipenhancer_ans_multiloss_16k_base",
enable_denoiser: bool = True,
optimize: bool = True,
device: str | None = None,
lora_config: Optional[LoRAConfig] = None,
lora_weights_path: Optional[str] = None,
)
| 13 | |
| 14 | class VoxCPM: |
| 15 | def __init__( |
| 16 | self, |
| 17 | voxcpm_model_path: str, |
| 18 | zipenhancer_model_path: str | None = "iic/speech_zipenhancer_ans_multiloss_16k_base", |
| 19 | enable_denoiser: bool = True, |
| 20 | optimize: bool = True, |
| 21 | device: str | None = None, |
| 22 | lora_config: Optional[LoRAConfig] = None, |
| 23 | lora_weights_path: Optional[str] = None, |
| 24 | ): |
| 25 | """Initialize VoxCPM TTS pipeline. |
| 26 | |
| 27 | Args: |
| 28 | voxcpm_model_path: Local filesystem path to the VoxCPM model assets |
| 29 | (weights, configs, etc.). Typically the directory returned by |
| 30 | a prior download step. |
| 31 | zipenhancer_model_path: ModelScope acoustic noise suppression model |
| 32 | id or local path. If None, denoiser will not be initialized. |
| 33 | enable_denoiser: Whether to initialize the denoiser pipeline. |
| 34 | optimize: Whether to optimize the model with torch.compile. True by default, but can be disabled for debugging. |
| 35 | device: Runtime device. If set to ``None`` or ``"auto"``, VoxCPM |
| 36 | will choose automatically (preferring CUDA, then MPS, then CPU). |
| 37 | If set explicitly, that device is used or a clear error is raised. |
| 38 | lora_config: LoRA configuration for fine-tuning. If lora_weights_path is |
| 39 | provided without lora_config, a default config will be created. |
| 40 | lora_weights_path: Path to pre-trained LoRA weights (.pth file or directory |
| 41 | containing lora_weights.ckpt). If provided, LoRA weights will be loaded. |
| 42 | """ |
| 43 | print( |
| 44 | f"voxcpm_model_path: {voxcpm_model_path}, zipenhancer_model_path: {zipenhancer_model_path}, enable_denoiser: {enable_denoiser}", |
| 45 | file=sys.stderr, |
| 46 | ) |
| 47 | |
| 48 | # If lora_weights_path is provided but no lora_config, create a default one |
| 49 | if lora_weights_path is not None and lora_config is None: |
| 50 | lora_config = LoRAConfig( |
| 51 | enable_lm=True, |
| 52 | enable_dit=True, |
| 53 | enable_proj=False, |
| 54 | ) |
| 55 | print(f"Auto-created default LoRAConfig for loading weights from: {lora_weights_path}", file=sys.stderr) |
| 56 | |
| 57 | # Determine model type from config.json architecture field |
| 58 | config_path = os.path.join(voxcpm_model_path, "config.json") |
| 59 | with open(config_path, "r", encoding="utf-8") as f: |
| 60 | config = json.load(f) |
| 61 | arch = config.get("architecture", "voxcpm").lower() |
| 62 | |
| 63 | if arch == "voxcpm2": |
| 64 | self.tts_model = VoxCPM2Model.from_local( |
| 65 | voxcpm_model_path, |
| 66 | optimize=optimize, |
| 67 | device=device, |
| 68 | lora_config=lora_config, |
| 69 | ) |
| 70 | print("Loaded VoxCPM2Model", file=sys.stderr) |
| 71 | elif arch == "voxcpm": |
| 72 | self.tts_model = VoxCPMModel.from_local( |
nothing calls this directly
no test coverage detected