MCPcopy Index your code
hub / github.com/modelscope/FunASR / encode

Method encode

funasr/models/whisper_lid/model.py:257–294  ·  view source on GitHub ↗

Frontend + Encoder. Note that this method is used by asr_inference.py Args: speech: (Batch, Length, ...) speech_lengths: (Batch, ) ind: int

(
        self,
        speech: torch.Tensor,
        speech_lengths: torch.Tensor,
        **kwargs,
    )

Source from the content-addressed store, hash-verified

255 return loss, stats, weight
256
257 def encode(
258 self,
259 speech: torch.Tensor,
260 speech_lengths: torch.Tensor,
261 **kwargs,
262 ) -> Tuple[torch.Tensor, torch.Tensor]:
263 """Frontend + Encoder. Note that this method is used by asr_inference.py
264 Args:
265 speech: (Batch, Length, ...)
266 speech_lengths: (Batch, )
267 ind: int
268 """
269 with autocast(False):
270
271 # Data augmentation
272 if self.specaug is not None and self.training:
273 speech, speech_lengths = self.specaug(speech, speech_lengths)
274
275 # Normalization for feature: e.g. Global-CMVN, Utterance-CMVN
276 if self.normalize is not None:
277 speech, speech_lengths = self.normalize(speech, speech_lengths)
278
279 # Forward encoder
280 # feats: (Batch, Length, Dim)
281 # -> encoder_out: (Batch, Length2, Dim2)
282 if self.encoder.interctc_use_conditioning:
283 encoder_out, encoder_out_lens, _ = self.encoder(speech, speech_lengths, ctc=self.ctc)
284 else:
285 encoder_out, encoder_out_lens, _ = self.encoder(speech, speech_lengths)
286 intermediate_outs = None
287 if isinstance(encoder_out, tuple):
288 intermediate_outs = encoder_out[1]
289 encoder_out = encoder_out[0]
290
291 if intermediate_outs is not None:
292 return (encoder_out, intermediate_outs), encoder_out_lens
293
294 return encoder_out, encoder_out_lens
295
296 def _calc_att_loss(
297 self,

Callers 2

forwardMethod · 0.95
inferenceMethod · 0.95

Calls 2

autocastFunction · 0.90
normalizeMethod · 0.45

Tested by

no test coverage detected