Encoder. Note that this method is used by asr_inference.py Args: speech: (Batch, Length, ...) speech_lengths: (Batch, ) ind: int
(
self,
speech: torch.Tensor,
speech_lengths: torch.Tensor,
**kwargs,
)
| 275 | return loss, stats, weight |
| 276 | |
| 277 | def encode( |
| 278 | self, |
| 279 | speech: torch.Tensor, |
| 280 | speech_lengths: torch.Tensor, |
| 281 | **kwargs, |
| 282 | ) -> Tuple[torch.Tensor, torch.Tensor]: |
| 283 | """Encoder. Note that this method is used by asr_inference.py |
| 284 | Args: |
| 285 | speech: (Batch, Length, ...) |
| 286 | speech_lengths: (Batch, ) |
| 287 | ind: int |
| 288 | """ |
| 289 | with autocast(False): |
| 290 | |
| 291 | # Data augmentation |
| 292 | if self.specaug is not None and self.training: |
| 293 | speech, speech_lengths = self.specaug(speech, speech_lengths) |
| 294 | |
| 295 | # Normalization for feature: e.g. Global-CMVN, Utterance-CMVN |
| 296 | if self.normalize is not None: |
| 297 | speech, speech_lengths = self.normalize(speech, speech_lengths) |
| 298 | |
| 299 | # Forward encoder |
| 300 | encoder_out, encoder_out_lens, _ = self.encoder(speech, speech_lengths) |
| 301 | if isinstance(encoder_out, tuple): |
| 302 | encoder_out = encoder_out[0] |
| 303 | |
| 304 | return encoder_out, encoder_out_lens |
| 305 | |
| 306 | def encode_chunk( |
| 307 | self, |
no test coverage detected