Class Xtts

TTS/tts/models/xtts.py:191–789 · view source on GitHub ↗

ⓍTTS model implementation. ❗ Currently it only supports inference. Examples: >>> from TTS.tts.configs.xtts_config import XttsConfig >>> from TTS.tts.models.xtts import Xtts >>> config = XttsConfig() >>> model = Xtts.inif_from_config(config) >>> model

Source from the content-addressed store, hash-verified

189
190
191	class Xtts(BaseTTS):
192	"""ⓍTTS model implementation.
193
194	❗ Currently it only supports inference.
195
196	Examples:
197	>>> from TTS.tts.configs.xtts_config import XttsConfig
198	>>> from TTS.tts.models.xtts import Xtts
199	>>> config = XttsConfig()
200	>>> model = Xtts.inif_from_config(config)
201	>>> model.load_checkpoint(config, checkpoint_dir="paths/to/models_dir/", eval=True)
202	"""
203
204	def __init__(self, config: Coqpit):
205	super().__init__(config, ap=None, tokenizer=None)
206	self.mel_stats_path = None
207	self.config = config
208	self.gpt_checkpoint = self.args.gpt_checkpoint
209	self.decoder_checkpoint = self.args.decoder_checkpoint # TODO: check if this is even needed
210	self.models_dir = config.model_dir
211	self.gpt_batch_size = self.args.gpt_batch_size
212
213	self.tokenizer = VoiceBpeTokenizer()
214	self.gpt = None
215	self.init_models()
216	self.register_buffer("mel_stats", torch.ones(80))
217
218	def init_models(self):
219	"""Initialize the models. We do it here since we need to load the tokenizer first."""
220	if self.tokenizer.tokenizer is not None:
221	self.args.gpt_number_text_tokens = self.tokenizer.get_number_tokens()
222	self.args.gpt_start_text_token = self.tokenizer.tokenizer.token_to_id("[START]")
223	self.args.gpt_stop_text_token = self.tokenizer.tokenizer.token_to_id("[STOP]")
224
225	if self.args.gpt_number_text_tokens:
226	self.gpt = GPT(
227	layers=self.args.gpt_layers,
228	model_dim=self.args.gpt_n_model_channels,
229	start_text_token=self.args.gpt_start_text_token,
230	stop_text_token=self.args.gpt_stop_text_token,
231	heads=self.args.gpt_n_heads,
232	max_text_tokens=self.args.gpt_max_text_tokens,
233	max_mel_tokens=self.args.gpt_max_audio_tokens,
234	max_prompt_tokens=self.args.gpt_max_prompt_tokens,
235	number_text_tokens=self.args.gpt_number_text_tokens,
236	num_audio_tokens=self.args.gpt_num_audio_tokens,
237	start_audio_token=self.args.gpt_start_audio_token,
238	stop_audio_token=self.args.gpt_stop_audio_token,
239	use_perceiver_resampler=self.args.gpt_use_perceiver_resampler,
240	code_stride_len=self.args.gpt_code_stride_len,
241	)
242
243	self.hifigan_decoder = HifiDecoder(
244	input_sample_rate=self.args.input_sample_rate,
245	output_sample_rate=self.args.output_sample_rate,
246	output_hop_length=self.args.output_hop_length,
247	ar_mel_length_compression=self.args.gpt_code_stride_len,
248	decoder_input_dim=self.args.decoder_input_dim,

Callers 2

__init__Method · 0.90

init_from_configMethod · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…