Method encode_audio

wan/speech2video.py:283–295 · view source on GitHub ↗

(self, audio_path, infer_frames)

Source from the content-addressed store, hash-verified

281	return cond
282
283	def encode_audio(self, audio_path, infer_frames):
284	z = self.audio_encoder.extract_audio_feat(
285	audio_path, return_all_layers=True)
286	audio_embed_bucket, num_repeat = self.audio_encoder.get_audio_embed_bucket_fps(
287	z, fps=self.fps, batch_frames=infer_frames, m=self.audio_sample_m)
288	audio_embed_bucket = audio_embed_bucket.to(self.device,
289	self.param_dtype)
290	audio_embed_bucket = audio_embed_bucket.unsqueeze(0)
291	if len(audio_embed_bucket.shape) == 3:
292	audio_embed_bucket = audio_embed_bucket.permute(0, 2, 1)
293	elif len(audio_embed_bucket.shape) == 4:
294	audio_embed_bucket = audio_embed_bucket.permute(0, 2, 3, 1)
295	return audio_embed_bucket, num_repeat
296
297	def read_last_n_frames(self,
298	video_path,

generateMethod · 0.95

extract_audio_featMethod · 0.80

get_audio_embed_bucket_fpsMethod · 0.80

toMethod · 0.80

no test coverage detected