MCPcopy
hub / github.com/mudler/LocalAI / TTS

Method TTS

backend/python/liquid-audio/backend.py:305–364  ·  view source on GitHub ↗
(self, request, context)

Source from the content-addressed store, hash-verified

303
304
305 def TTS(self, request, context):
306 try:
307 if self.model is None or self.processor is None:
308 return backend_pb2.Result(success=False, message="Model not loaded")
309
310 import torch
311 import torchaudio
312 from liquid_audio import ChatState
313
314 voice = request.voice.lower() if request.voice else self.voice
315 voice = voice.removeprefix("lfm2:").removeprefix("lfm:")
316 if voice not in VOICE_PROMPTS:
317 voice = self.voice
318 system_prompt = VOICE_PROMPTS[voice]
319
320 chat = ChatState(self.processor)
321 chat.new_turn("system")
322 chat.add_text(system_prompt)
323 chat.end_turn()
324 chat.new_turn("user")
325 chat.add_text(request.text or "")
326 chat.end_turn()
327 chat.new_turn("assistant")
328
329 audio_top_k = int(self.options.get("audio_top_k", 64))
330 audio_temp = float(self.options.get("audio_temperature", 0.8))
331 max_new = int(self.options.get("max_new_tokens", 2048))
332
333 audio_out = []
334 for tok in self.model.generate_sequential(
335 **chat,
336 max_new_tokens=max_new,
337 audio_temperature=audio_temp,
338 audio_top_k=audio_top_k,
339 ):
340 if tok.numel() > 1:
341 audio_out.append(tok)
342
343 if len(audio_out) <= 1:
344 return backend_pb2.Result(success=False, message="No audio frames generated")
345
346 # Drop the trailing end-of-audio frame, matching the package's examples.
347 audio_codes = torch.stack(audio_out[:-1], 1).unsqueeze(0)
348 waveform = self.processor.decode(audio_codes)
349
350 out_path = request.dst
351 if not out_path:
352 return backend_pb2.Result(success=False, message="dst path is required")
353 os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True)
354 # soundfile in preference to torchaudio.save — the latter routes
355 # through torchcodec, whose native libs need NVIDIA NPP that we
356 # don't bundle in the cuda13 image.
357 import soundfile as _sf
358 _sf.write(out_path, waveform.cpu().numpy().squeeze(0).T, 24_000)
359
360 return backend_pb2.Result(success=True)
361 except Exception as exc:
362 print(f"TTS failed: {exc}", file=sys.stderr)

Callers

nothing calls this directly

Calls 4

appendMethod · 0.80
decodeMethod · 0.80
getMethod · 0.45
writeMethod · 0.45

Tested by

no test coverage detected