MCPcopy
hub / github.com/Audio-AGI/AudioSep / get_mel

Function get_mel

models/CLAP/training/data.py:413–448  ·  view source on GitHub ↗
(audio_data, audio_cfg)

Source from the content-addressed store, hash-verified

411
412
413def get_mel(audio_data, audio_cfg):
414 # mel shape: (n_mels, T)
415 mel = torchaudio.transforms.MelSpectrogram(
416 sample_rate=audio_cfg["sample_rate"],
417 n_fft=audio_cfg["window_size"],
418 win_length=audio_cfg["window_size"],
419 hop_length=audio_cfg["hop_size"],
420 center=True,
421 pad_mode="reflect",
422 power=2.0,
423 norm=None,
424 onesided=True,
425 n_mels=64,
426 f_min=audio_cfg["fmin"],
427 f_max=audio_cfg["fmax"],
428 ).to(audio_data.device)
429 mel = mel(audio_data)
430 # Align to librosa:
431 # librosa_melspec = librosa.feature.melspectrogram(
432 # waveform,
433 # sr=audio_cfg['sample_rate'],
434 # n_fft=audio_cfg['window_size'],
435 # hop_length=audio_cfg['hop_size'],
436 # win_length=audio_cfg['window_size'],
437 # center=True,
438 # pad_mode="reflect",
439 # power=2.0,
440 # n_mels=64,
441 # norm=None,
442 # htk=True,
443 # f_min=audio_cfg['fmin'],
444 # f_max=audio_cfg['fmax']
445 # )
446 # we use log mel spectrogram as input
447 mel = torchaudio.transforms.AmplitudeToDB(top_db=None)(mel)
448 return mel.T # (T, n_mels)
449
450
451def get_audio_features(

Callers 2

__getitem__Method · 0.85
get_audio_featuresFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected