MCPcopy
hub / github.com/OpenBMB/VoxCPM / generate_tts_audio

Method generate_tts_audio

app_old.py:78–111  ·  view source on GitHub ↗

Generate speech from text using VoxCPM; optional reference audio for voice style guidance. Returns (sample_rate, waveform_numpy)

(
        self,
        text_input: str,
        prompt_wav_path_input: Optional[str] = None,
        prompt_text_input: Optional[str] = None,
        cfg_value_input: float = 2.0,
        inference_timesteps_input: int = 10,
        do_normalize: bool = True,
        denoise: bool = True,
    )

Source from the content-addressed store, hash-verified

76 return text
77
78 def generate_tts_audio(
79 self,
80 text_input: str,
81 prompt_wav_path_input: Optional[str] = None,
82 prompt_text_input: Optional[str] = None,
83 cfg_value_input: float = 2.0,
84 inference_timesteps_input: int = 10,
85 do_normalize: bool = True,
86 denoise: bool = True,
87 ) -> Tuple[int, np.ndarray]:
88 """
89 Generate speech from text using VoxCPM; optional reference audio for voice style guidance.
90 Returns (sample_rate, waveform_numpy)
91 """
92 current_model = self.get_or_load_voxcpm()
93
94 text = (text_input or "").strip()
95 if len(text) == 0:
96 raise ValueError("Please input text to synthesize.")
97
98 prompt_wav_path = prompt_wav_path_input if prompt_wav_path_input else None
99 prompt_text = prompt_text_input if prompt_text_input else None
100
101 print(f"Generating audio for text: '{text[:60]}...'", file=sys.stderr)
102 wav = current_model.generate(
103 text=text,
104 prompt_text=prompt_text,
105 prompt_wav_path=prompt_wav_path,
106 cfg_value=float(cfg_value_input),
107 inference_timesteps=int(inference_timesteps_input),
108 normalize=do_normalize,
109 denoise=denoise,
110 )
111 return (current_model.tts_model.sample_rate, wav)
112
113
114# ---------- UI Builders ----------

Callers

nothing calls this directly

Calls 2

get_or_load_voxcpmMethod · 0.95
generateMethod · 0.45

Tested by

no test coverage detected