Encode given audio data and return quantized latent codes Parameters ---------- audio_data : Tensor[B x 1 x T] Audio data to encode n_quantizers : int, optional Number of quantizers to use, by default None If None, all quantizers a
(
self,
audio_data: torch.Tensor,
n_quantizers: int = None,
)
| 207 | return audio_data |
| 208 | |
| 209 | def encode( |
| 210 | self, |
| 211 | audio_data: torch.Tensor, |
| 212 | n_quantizers: int = None, |
| 213 | ): |
| 214 | """Encode given audio data and return quantized latent codes |
| 215 | |
| 216 | Parameters |
| 217 | ---------- |
| 218 | audio_data : Tensor[B x 1 x T] |
| 219 | Audio data to encode |
| 220 | n_quantizers : int, optional |
| 221 | Number of quantizers to use, by default None |
| 222 | If None, all quantizers are used. |
| 223 | |
| 224 | Returns |
| 225 | ------- |
| 226 | dict |
| 227 | A dictionary with the following keys: |
| 228 | "z" : Tensor[B x D x T] |
| 229 | Quantized continuous representation of input |
| 230 | "codes" : Tensor[B x N x T] |
| 231 | Codebook indices for each codebook |
| 232 | (quantized discrete representation of input) |
| 233 | "latents" : Tensor[B x N*D x T] |
| 234 | Projected latents (continuous representation of input before quantization) |
| 235 | "vq/commitment_loss" : Tensor[1] |
| 236 | Commitment loss to train encoder to predict vectors closer to codebook |
| 237 | entries |
| 238 | "vq/codebook_loss" : Tensor[1] |
| 239 | Codebook loss to update the codebook |
| 240 | "length" : int |
| 241 | Number of samples in input audio |
| 242 | """ |
| 243 | z = self.encoder(audio_data) |
| 244 | z, codes, latents, commitment_loss, codebook_loss = self.quantizer( |
| 245 | z, n_quantizers |
| 246 | ) |
| 247 | return z, codes, latents, commitment_loss, codebook_loss |
| 248 | |
| 249 | def decode(self, z: torch.Tensor): |
| 250 | """Decode given latent codes and return audio data |