(self, audio_path: str, actions)
| 277 | return _cosine_distance(self.embed(audio1), self.embed(audio2)) |
| 278 | |
| 279 | def analyze(self, audio_path: str, actions): |
| 280 | # Age / gender / emotion aren't produced by ECAPA-TDNN itself; |
| 281 | # delegate to AnalysisHead which wraps separate Apache-2.0 |
| 282 | # checkpoints. Returns a single segment spanning the clip — |
| 283 | # segmentation / diarisation is a future enhancement. |
| 284 | waveform = self._load_waveform(audio_path) |
| 285 | mono = waveform.squeeze().detach().cpu().numpy() |
| 286 | attrs = self._analysis.analyze(audio_path, mono, actions) |
| 287 | if not attrs: |
| 288 | raise NotImplementedError( |
| 289 | "analyze head failed to load — install transformers + torch or pass age_gender_model/emotion_model options" |
| 290 | ) |
| 291 | duration = float(mono.shape[-1]) / 16000.0 if mono.size else 0.0 |
| 292 | return [dict(start=0.0, end=duration, **attrs)] |
| 293 | |
| 294 | |
| 295 | class OnnxDirectEngine: |
nothing calls this directly
no test coverage detected