(self, audio_path: str)
| 356 | return audio.astype("float32") |
| 357 | |
| 358 | def embed(self, audio_path: str) -> list[float]: |
| 359 | import numpy as np |
| 360 | |
| 361 | audio = self._load_waveform(audio_path) |
| 362 | if self._input_rank >= 3: |
| 363 | feats = self._extract_fbank(audio) # [frames, n_mels] |
| 364 | feed = feats[np.newaxis, :, :] # [1, frames, n_mels] |
| 365 | else: |
| 366 | feed = audio.reshape(1, -1) # [1, samples] |
| 367 | out = self._session.run(None, {self._input_name: feed}) |
| 368 | vec = np.asarray(out[0]).reshape(-1) |
| 369 | return [float(x) for x in vec] |
| 370 | |
| 371 | def _extract_fbank(self, audio): |
| 372 | """Compute Kaldi-style 80-dim FBank features for speaker encoders that |
no test coverage detected