Method analyze

backend/python/speaker-recognition/engines.py:396–417 · view source on GitHub ↗

(self, audio_path: str, actions)

Source from the content-addressed store, hash-verified

394	return _cosine_distance(self.embed(audio1), self.embed(audio2))
395
396	def analyze(self, audio_path: str, actions):
397	# AnalysisHead expects 16kHz mono; _load_waveform already
398	# resamples to self._expected_sr. If the user configured a
399	# non-16k expected rate, resample one more time for analyze.
400	audio = self._load_waveform(audio_path)
401	if self._expected_sr != 16000:
402	import numpy as np
403
404	ratio = 16000 / float(self._expected_sr)
405	n = int(round(len(audio) * ratio))
406	audio = np.interp(
407	np.linspace(0, len(audio), n, endpoint=False),
408	np.arange(len(audio)),
409	audio,
410	).astype("float32")
411	attrs = self._analysis.analyze(audio_path, audio, actions)
412	if not attrs:
413	raise NotImplementedError(
414	"analyze head failed to load — install transformers + torch or pass age_gender_model/emotion_model options"
415	)
416	duration = float(len(audio)) / 16000.0 if len(audio) else 0.0
417	return [dict(start=0.0, end=duration, **attrs)]
418
419
420	def build_engine(model_name: str, options: dict[str, str]) -> tuple[SpeakerEngine, str]:

nothing calls this directly

_load_waveformMethod · 0.95

analyzeMethod · 0.45

no test coverage detected