MCPcopy
hub / github.com/mudler/LocalAI / TTS

Method TTS

backend/python/transformers/backend.py:544–617  ·  view source on GitHub ↗
(self, request, context)

Source from the content-addressed store, hash-verified

542 return backend_pb2.Result(success=True)
543
544 def TTS(self, request, context):
545 try:
546 text = request.text
547 print(f"[transformers] TTS generating for text: {text[:100]}...", file=sys.stderr)
548
549 # Build inputs based on processor capabilities
550 if request.voice and os.path.exists(request.voice):
551 # Voice cloning: use chat template with reference audio
552 chat_template = [{
553 "role": "0",
554 "content": [
555 {"type": "text", "text": text},
556 {"type": "audio", "path": request.voice},
557 ],
558 }]
559 inputs = self.processor.apply_chat_template(
560 chat_template, tokenize=True, return_dict=True,
561 ).to(self.model.device, self.model.dtype)
562 elif hasattr(self.processor, 'apply_chat_template'):
563 # Models that use chat template format (VibeVoice, CSM, etc.)
564 chat_template = [{"role": "0", "content": [{"type": "text", "text": text}]}]
565 try:
566 inputs = self.processor.apply_chat_template(
567 chat_template, tokenize=True, return_dict=True,
568 ).to(self.model.device, self.model.dtype)
569 except Exception:
570 # Fallback if chat template fails (not all processors support it)
571 inputs = self.processor(text=[text], padding=True, return_tensors="pt")
572 if self.CUDA:
573 inputs = inputs.to("cuda")
574 else:
575 # Direct processor call (Musicgen, etc.)
576 inputs = self.processor(text=[text], padding=True, return_tensors="pt")
577 if self.CUDA:
578 inputs = inputs.to("cuda")
579
580 # Build generation kwargs from self.options
581 gen_kwargs = {**inputs, "max_new_tokens": self.max_tokens}
582 for key in ["guidance_scale", "temperature", "top_p", "top_k", "do_sample"]:
583 if key in self.options:
584 gen_kwargs[key] = self.options[key]
585
586 # Add noise scheduler if configured (e.g., for VibeVoice)
587 noise_scheduler_type = self.options.get("noise_scheduler", None)
588 if noise_scheduler_type:
589 import diffusers
590 SchedulerClass = getattr(diffusers, noise_scheduler_type)
591 scheduler_kwargs = {}
592 for key in ["beta_schedule", "prediction_type"]:
593 if key in self.options:
594 scheduler_kwargs[key] = self.options[key]
595 gen_kwargs["noise_scheduler"] = SchedulerClass(**scheduler_kwargs)
596
597 # Generate audio
598 audio = self.model.generate(**gen_kwargs)
599 print("[transformers] TTS generated!", file=sys.stderr)
600
601 # Save audio output

Callers

nothing calls this directly

Calls 5

processorMethod · 0.80
generateMethod · 0.80
toMethod · 0.45
getMethod · 0.45
writeMethod · 0.45

Tested by

no test coverage detected