(self, audio_path, infer_frames)
| 281 | return cond |
| 282 | |
| 283 | def encode_audio(self, audio_path, infer_frames): |
| 284 | z = self.audio_encoder.extract_audio_feat( |
| 285 | audio_path, return_all_layers=True) |
| 286 | audio_embed_bucket, num_repeat = self.audio_encoder.get_audio_embed_bucket_fps( |
| 287 | z, fps=self.fps, batch_frames=infer_frames, m=self.audio_sample_m) |
| 288 | audio_embed_bucket = audio_embed_bucket.to(self.device, |
| 289 | self.param_dtype) |
| 290 | audio_embed_bucket = audio_embed_bucket.unsqueeze(0) |
| 291 | if len(audio_embed_bucket.shape) == 3: |
| 292 | audio_embed_bucket = audio_embed_bucket.permute(0, 2, 1) |
| 293 | elif len(audio_embed_bucket.shape) == 4: |
| 294 | audio_embed_bucket = audio_embed_bucket.permute(0, 2, 3, 1) |
| 295 | return audio_embed_bucket, num_repeat |
| 296 | |
| 297 | def read_last_n_frames(self, |
| 298 | video_path, |
no test coverage detected