MCPcopy
hub / github.com/mudler/LocalAI / analyze

Method analyze

backend/python/speaker-recognition/engines.py:396–417  ·  view source on GitHub ↗
(self, audio_path: str, actions)

Source from the content-addressed store, hash-verified

394 return _cosine_distance(self.embed(audio1), self.embed(audio2))
395
396 def analyze(self, audio_path: str, actions):
397 # AnalysisHead expects 16kHz mono; _load_waveform already
398 # resamples to self._expected_sr. If the user configured a
399 # non-16k expected rate, resample one more time for analyze.
400 audio = self._load_waveform(audio_path)
401 if self._expected_sr != 16000:
402 import numpy as np
403
404 ratio = 16000 / float(self._expected_sr)
405 n = int(round(len(audio) * ratio))
406 audio = np.interp(
407 np.linspace(0, len(audio), n, endpoint=False),
408 np.arange(len(audio)),
409 audio,
410 ).astype("float32")
411 attrs = self._analysis.analyze(audio_path, audio, actions)
412 if not attrs:
413 raise NotImplementedError(
414 "analyze head failed to load — install transformers + torch or pass age_gender_model/emotion_model options"
415 )
416 duration = float(len(audio)) / 16000.0 if len(audio) else 0.0
417 return [dict(start=0.0, end=duration, **attrs)]
418
419
420def build_engine(model_name: str, options: dict[str, str]) -> tuple[SpeakerEngine, str]:

Callers

nothing calls this directly

Calls 2

_load_waveformMethod · 0.95
analyzeMethod · 0.45

Tested by

no test coverage detected