MCPcopy
hub / github.com/mudler/LocalAI / SoundGeneration

Method SoundGeneration

backend/python/transformers/backend.py:478–542  ·  view source on GitHub ↗
(self, request, context)

Source from the content-addressed store, hash-verified

476 await iterations.aclose()
477
478 def SoundGeneration(self, request, context):
479 model_name = request.model
480 try:
481 if self.processor is None:
482 if model_name == "":
483 return backend_pb2.Result(success=False, message="request.model is required")
484 self.processor = AutoProcessor.from_pretrained(model_name)
485 if self.model is None:
486 if model_name == "":
487 return backend_pb2.Result(success=False, message="request.model is required")
488 # Dynamically resolve model class if configured, otherwise default to MusicgenForConditionalGeneration
489 model_type = self.options.get("model_type", "MusicgenForConditionalGeneration")
490 ModelClass = getattr(transformers_module, model_type)
491 self.model = ModelClass.from_pretrained(model_name)
492 inputs = None
493 if request.text == "":
494 inputs = self.model.get_unconditional_inputs(num_samples=1)
495 elif request.HasField('src'):
496 sample_rate, wsamples = wavfile.read('path_to_your_file.wav')
497
498 if request.HasField('src_divisor'):
499 wsamples = wsamples[: len(wsamples) // request.src_divisor]
500
501 inputs = self.processor(
502 audio=wsamples,
503 sampling_rate=sample_rate,
504 text=[request.text],
505 padding=True,
506 return_tensors="pt",
507 )
508 else:
509 inputs = self.processor(
510 text=[request.text],
511 padding=True,
512 return_tensors="pt",
513 )
514
515 if request.HasField('duration'):
516 tokens = int(request.duration * 51.2) # 256 tokens = 5 seconds, therefore 51.2 tokens is one second
517 guidance = self.options.get("guidance_scale", 3.0)
518 if request.HasField('temperature'):
519 guidance = request.temperature
520 dosample = self.options.get("do_sample", True)
521 if request.HasField('sample'):
522 dosample = request.sample
523 audio_values = self.model.generate(**inputs, do_sample=dosample, guidance_scale=guidance, max_new_tokens=self.max_tokens)
524 print("[transformers] SoundGeneration generated!", file=sys.stderr)
525
526 # Save audio output
527 if hasattr(self.processor, 'save_audio'):
528 if hasattr(self.processor, 'batch_decode'):
529 try:
530 audio_values = self.processor.batch_decode(audio_values)
531 except Exception:
532 pass
533 self.processor.save_audio(audio_values, request.dst)
534 else:
535 sampling_rate = self.model.config.audio_encoder.sampling_rate

Callers

nothing calls this directly

Calls 5

processorMethod · 0.80
generateMethod · 0.80
getMethod · 0.45
readMethod · 0.45
writeMethod · 0.45

Tested by

no test coverage detected