MCPcopy
hub / github.com/coqui-ai/TTS / Xtts

Class Xtts

TTS/tts/models/xtts.py:191–789  ·  view source on GitHub ↗

ⓍTTS model implementation. ❗ Currently it only supports inference. Examples: >>> from TTS.tts.configs.xtts_config import XttsConfig >>> from TTS.tts.models.xtts import Xtts >>> config = XttsConfig() >>> model = Xtts.inif_from_config(config) >>> model

Source from the content-addressed store, hash-verified

189
190
191class Xtts(BaseTTS):
192 """ⓍTTS model implementation.
193
194 ❗ Currently it only supports inference.
195
196 Examples:
197 >>> from TTS.tts.configs.xtts_config import XttsConfig
198 >>> from TTS.tts.models.xtts import Xtts
199 >>> config = XttsConfig()
200 >>> model = Xtts.inif_from_config(config)
201 >>> model.load_checkpoint(config, checkpoint_dir="paths/to/models_dir/", eval=True)
202 """
203
204 def __init__(self, config: Coqpit):
205 super().__init__(config, ap=None, tokenizer=None)
206 self.mel_stats_path = None
207 self.config = config
208 self.gpt_checkpoint = self.args.gpt_checkpoint
209 self.decoder_checkpoint = self.args.decoder_checkpoint # TODO: check if this is even needed
210 self.models_dir = config.model_dir
211 self.gpt_batch_size = self.args.gpt_batch_size
212
213 self.tokenizer = VoiceBpeTokenizer()
214 self.gpt = None
215 self.init_models()
216 self.register_buffer("mel_stats", torch.ones(80))
217
218 def init_models(self):
219 """Initialize the models. We do it here since we need to load the tokenizer first."""
220 if self.tokenizer.tokenizer is not None:
221 self.args.gpt_number_text_tokens = self.tokenizer.get_number_tokens()
222 self.args.gpt_start_text_token = self.tokenizer.tokenizer.token_to_id("[START]")
223 self.args.gpt_stop_text_token = self.tokenizer.tokenizer.token_to_id("[STOP]")
224
225 if self.args.gpt_number_text_tokens:
226 self.gpt = GPT(
227 layers=self.args.gpt_layers,
228 model_dim=self.args.gpt_n_model_channels,
229 start_text_token=self.args.gpt_start_text_token,
230 stop_text_token=self.args.gpt_stop_text_token,
231 heads=self.args.gpt_n_heads,
232 max_text_tokens=self.args.gpt_max_text_tokens,
233 max_mel_tokens=self.args.gpt_max_audio_tokens,
234 max_prompt_tokens=self.args.gpt_max_prompt_tokens,
235 number_text_tokens=self.args.gpt_number_text_tokens,
236 num_audio_tokens=self.args.gpt_num_audio_tokens,
237 start_audio_token=self.args.gpt_start_audio_token,
238 stop_audio_token=self.args.gpt_stop_audio_token,
239 use_perceiver_resampler=self.args.gpt_use_perceiver_resampler,
240 code_stride_len=self.args.gpt_code_stride_len,
241 )
242
243 self.hifigan_decoder = HifiDecoder(
244 input_sample_rate=self.args.input_sample_rate,
245 output_sample_rate=self.args.output_sample_rate,
246 output_hop_length=self.args.output_hop_length,
247 ar_mel_length_compression=self.args.gpt_code_stride_len,
248 decoder_input_dim=self.args.decoder_input_dim,

Callers 2

__init__Method · 0.90
init_from_configMethod · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…