Open an audio file and read as mono waveform, resampling as necessary Parameters ---------- file: str The audio file to open sr: int The sample rate to resample the audio if necessary Returns ------- A NumPy array containing the audio waveform, in
(file: str, sr: int = 16000)
| 316 | |
| 317 | |
| 318 | def _load_audio_ffmpeg(file: str, sr: int = 16000): |
| 319 | """ |
| 320 | Open an audio file and read as mono waveform, resampling as necessary |
| 321 | |
| 322 | Parameters |
| 323 | ---------- |
| 324 | file: str |
| 325 | The audio file to open |
| 326 | |
| 327 | sr: int |
| 328 | The sample rate to resample the audio if necessary |
| 329 | |
| 330 | Returns |
| 331 | ------- |
| 332 | A NumPy array containing the audio waveform, in float32 dtype. |
| 333 | """ |
| 334 | |
| 335 | # This launches a subprocess to decode audio while down-mixing |
| 336 | # and resampling as necessary. Requires the ffmpeg CLI in PATH. |
| 337 | # fmt: off |
| 338 | pcm_params = [] |
| 339 | if file.lower().endswith('.pcm'): |
| 340 | pcm_params = [ |
| 341 | "-f", "s16le", |
| 342 | "-ar", str(sr), |
| 343 | "-ac", "1" |
| 344 | ] |
| 345 | |
| 346 | cmd = [ |
| 347 | "ffmpeg", |
| 348 | "-nostdin", |
| 349 | "-threads", "0", |
| 350 | *pcm_params, # PCM files need input format specified before -i since PCM is raw data without headers |
| 351 | "-i", file, |
| 352 | "-f", "s16le", |
| 353 | "-ac", "1", |
| 354 | "-acodec", "pcm_s16le", |
| 355 | "-ar", str(sr), |
| 356 | "-" |
| 357 | ] |
| 358 | # fmt: on |
| 359 | try: |
| 360 | out = run(cmd, capture_output=True, check=True).stdout |
| 361 | except CalledProcessError as e: |
| 362 | raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e |
| 363 | |
| 364 | return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0 |
no test coverage detected
searching dependent graphs…