Generate speech from text using VoxCPM; optional reference audio for voice style guidance. Returns (sample_rate, waveform_numpy)
(
self,
text_input: str,
prompt_wav_path_input: Optional[str] = None,
prompt_text_input: Optional[str] = None,
cfg_value_input: float = 2.0,
inference_timesteps_input: int = 10,
do_normalize: bool = True,
denoise: bool = True,
)
| 76 | return text |
| 77 | |
| 78 | def generate_tts_audio( |
| 79 | self, |
| 80 | text_input: str, |
| 81 | prompt_wav_path_input: Optional[str] = None, |
| 82 | prompt_text_input: Optional[str] = None, |
| 83 | cfg_value_input: float = 2.0, |
| 84 | inference_timesteps_input: int = 10, |
| 85 | do_normalize: bool = True, |
| 86 | denoise: bool = True, |
| 87 | ) -> Tuple[int, np.ndarray]: |
| 88 | """ |
| 89 | Generate speech from text using VoxCPM; optional reference audio for voice style guidance. |
| 90 | Returns (sample_rate, waveform_numpy) |
| 91 | """ |
| 92 | current_model = self.get_or_load_voxcpm() |
| 93 | |
| 94 | text = (text_input or "").strip() |
| 95 | if len(text) == 0: |
| 96 | raise ValueError("Please input text to synthesize.") |
| 97 | |
| 98 | prompt_wav_path = prompt_wav_path_input if prompt_wav_path_input else None |
| 99 | prompt_text = prompt_text_input if prompt_text_input else None |
| 100 | |
| 101 | print(f"Generating audio for text: '{text[:60]}...'", file=sys.stderr) |
| 102 | wav = current_model.generate( |
| 103 | text=text, |
| 104 | prompt_text=prompt_text, |
| 105 | prompt_wav_path=prompt_wav_path, |
| 106 | cfg_value=float(cfg_value_input), |
| 107 | inference_timesteps=int(inference_timesteps_input), |
| 108 | normalize=do_normalize, |
| 109 | denoise=denoise, |
| 110 | ) |
| 111 | return (current_model.tts_model.sample_rate, wav) |
| 112 | |
| 113 | |
| 114 | # ---------- UI Builders ---------- |
nothing calls this directly
no test coverage detected