hub / github.com/mudler/LocalAI / AudioTranscription

Method AudioTranscription

backend/python/liquid-audio/backend.py:521–569 · view source on GitHub ↗

(self, request, context)

Source from the content-addressed store, hash-verified

519
520
521	def AudioTranscription(self, request, context):
522	try:
523	if self.model is None or self.processor is None:
524	return backend_pb2.TranscriptResult(segments=[], text="")
525
526	import torchaudio
527	from liquid_audio import ChatState
528
529	audio_path = request.dst
530	if not audio_path:
531	return backend_pb2.TranscriptResult(segments=[], text="")
532
533	chat = ChatState(self.processor)
534	chat.new_turn("system")
535	chat.add_text("Perform ASR.")
536	chat.end_turn()
537	chat.new_turn("user")
538	# soundfile in preference to torchaudio.load — the latter routes
539	# through torchcodec which needs NVIDIA NPP libs we don't bundle.
540	import soundfile as _sf
541	import torch
542	audio_np, sr = _sf.read(audio_path, dtype="float32", always_2d=True)
543	wav = torch.from_numpy(audio_np.T) # (channels, samples)
544	if wav.shape[0] > 1:
545	# Down-mix to mono — the processor expects a single channel
546	wav = wav.mean(dim=0, keepdim=True)
547	chat.add_audio(wav, sr)
548	chat.end_turn()
549	chat.new_turn("assistant")
550
551	max_new = int(self.options.get("max_new_tokens", 1024))
552
553	pieces = []
554	for tok in self.model.generate_sequential(**chat, max_new_tokens=max_new):
555	if tok.numel() == 1:
556	if tok.item() == IM_END_TOKEN:
557	break
558	pieces.append(self.processor.text.decode(tok))
559
560	text = "".join(pieces).strip()
561	duration_ms = int((wav.shape[1] / sr) * 1000)
562	segment = backend_pb2.TranscriptSegment(
563	id=0, start=0, end=duration_ms, text=text, tokens=[],
564	)
565	return backend_pb2.TranscriptResult(segments=[segment], text=text)
566	except Exception as exc:
567	print(f"AudioTranscription failed: {exc}", file=sys.stderr)
568	print(traceback.format_exc(), file=sys.stderr)
569	return backend_pb2.TranscriptResult(segments=[], text="")
570
571
572	def StartFineTune(self, request, context):

Callers

nothing calls this directly

Calls 4

appendMethod · 0.80

decodeMethod · 0.80

readMethod · 0.45

getMethod · 0.45

Tested by

no test coverage detected