Processes an audio signal from a file or AudioSignal object into discrete codes. This function processes the signal in short windows, using constant GPU memory. Parameters ---------- audio_path_or_signal : Union[str, Path, AudioSignal] audio signa
(
self,
audio_path_or_signal: Union[str, Path, AudioSignal],
win_duration: float = 1.0,
verbose: bool = False,
normalize_db: float = -16,
n_quantizers: int = None,
)
| 124 | |
| 125 | @torch.no_grad() |
| 126 | def compress( |
| 127 | self, |
| 128 | audio_path_or_signal: Union[str, Path, AudioSignal], |
| 129 | win_duration: float = 1.0, |
| 130 | verbose: bool = False, |
| 131 | normalize_db: float = -16, |
| 132 | n_quantizers: int = None, |
| 133 | ) -> DACFile: |
| 134 | """Processes an audio signal from a file or AudioSignal object into |
| 135 | discrete codes. This function processes the signal in short windows, |
| 136 | using constant GPU memory. |
| 137 | |
| 138 | Parameters |
| 139 | ---------- |
| 140 | audio_path_or_signal : Union[str, Path, AudioSignal] |
| 141 | audio signal to reconstruct |
| 142 | win_duration : float, optional |
| 143 | window duration in seconds, by default 5.0 |
| 144 | verbose : bool, optional |
| 145 | by default False |
| 146 | normalize_db : float, optional |
| 147 | normalize db, by default -16 |
| 148 | |
| 149 | Returns |
| 150 | ------- |
| 151 | DACFile |
| 152 | Object containing compressed codes and metadata |
| 153 | required for decompression |
| 154 | """ |
| 155 | audio_signal = audio_path_or_signal |
| 156 | if isinstance(audio_signal, (str, Path)): |
| 157 | audio_signal = AudioSignal.load_from_file_with_ffmpeg(str(audio_signal)) |
| 158 | |
| 159 | self.eval() |
| 160 | original_padding = self.padding |
| 161 | original_device = audio_signal.device |
| 162 | |
| 163 | audio_signal = audio_signal.clone() |
| 164 | original_sr = audio_signal.sample_rate |
| 165 | |
| 166 | resample_fn = audio_signal.resample |
| 167 | loudness_fn = audio_signal.loudness |
| 168 | |
| 169 | # If audio is > 10 minutes long, use the ffmpeg versions |
| 170 | if audio_signal.signal_duration >= 10 * 60 * 60: |
| 171 | resample_fn = audio_signal.ffmpeg_resample |
| 172 | loudness_fn = audio_signal.ffmpeg_loudness |
| 173 | |
| 174 | original_length = audio_signal.signal_length |
| 175 | resample_fn(self.sample_rate) |
| 176 | input_db = loudness_fn() |
| 177 | |
| 178 | if normalize_db is not None: |
| 179 | audio_signal.normalize(normalize_db) |
| 180 | audio_signal.ensure_max_of_audio() |
| 181 | |
| 182 | nb, nac, nt = audio_signal.audio_data.shape |
| 183 | audio_signal.audio_data = audio_signal.audio_data.reshape(nb * nac, 1, nt) |
no test coverage detected