| 303 | |
| 304 | |
| 305 | def TTS(self, request, context): |
| 306 | try: |
| 307 | if self.model is None or self.processor is None: |
| 308 | return backend_pb2.Result(success=False, message="Model not loaded") |
| 309 | |
| 310 | import torch |
| 311 | import torchaudio |
| 312 | from liquid_audio import ChatState |
| 313 | |
| 314 | voice = request.voice.lower() if request.voice else self.voice |
| 315 | voice = voice.removeprefix("lfm2:").removeprefix("lfm:") |
| 316 | if voice not in VOICE_PROMPTS: |
| 317 | voice = self.voice |
| 318 | system_prompt = VOICE_PROMPTS[voice] |
| 319 | |
| 320 | chat = ChatState(self.processor) |
| 321 | chat.new_turn("system") |
| 322 | chat.add_text(system_prompt) |
| 323 | chat.end_turn() |
| 324 | chat.new_turn("user") |
| 325 | chat.add_text(request.text or "") |
| 326 | chat.end_turn() |
| 327 | chat.new_turn("assistant") |
| 328 | |
| 329 | audio_top_k = int(self.options.get("audio_top_k", 64)) |
| 330 | audio_temp = float(self.options.get("audio_temperature", 0.8)) |
| 331 | max_new = int(self.options.get("max_new_tokens", 2048)) |
| 332 | |
| 333 | audio_out = [] |
| 334 | for tok in self.model.generate_sequential( |
| 335 | **chat, |
| 336 | max_new_tokens=max_new, |
| 337 | audio_temperature=audio_temp, |
| 338 | audio_top_k=audio_top_k, |
| 339 | ): |
| 340 | if tok.numel() > 1: |
| 341 | audio_out.append(tok) |
| 342 | |
| 343 | if len(audio_out) <= 1: |
| 344 | return backend_pb2.Result(success=False, message="No audio frames generated") |
| 345 | |
| 346 | # Drop the trailing end-of-audio frame, matching the package's examples. |
| 347 | audio_codes = torch.stack(audio_out[:-1], 1).unsqueeze(0) |
| 348 | waveform = self.processor.decode(audio_codes) |
| 349 | |
| 350 | out_path = request.dst |
| 351 | if not out_path: |
| 352 | return backend_pb2.Result(success=False, message="dst path is required") |
| 353 | os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True) |
| 354 | # soundfile in preference to torchaudio.save — the latter routes |
| 355 | # through torchcodec, whose native libs need NVIDIA NPP that we |
| 356 | # don't bundle in the cuda13 image. |
| 357 | import soundfile as _sf |
| 358 | _sf.write(out_path, waveform.cpu().numpy().squeeze(0).T, 24_000) |
| 359 | |
| 360 | return backend_pb2.Result(success=True) |
| 361 | except Exception as exc: |
| 362 | print(f"TTS failed: {exc}", file=sys.stderr) |