MCPcopy
hub / github.com/Wan-Video/Wan2.2 / extract_audio_feat

Method extract_audio_feat

wan/modules/s2v/audio_encoder.py:66–89  ·  view source on GitHub ↗
(self,
                           audio_path,
                           return_all_layers=False,
                           dtype=torch.float32)

Source from the content-addressed store, hash-verified

64 self.video_rate = 30
65
66 def extract_audio_feat(self,
67 audio_path,
68 return_all_layers=False,
69 dtype=torch.float32):
70 audio_input, sample_rate = librosa.load(audio_path, sr=16000)
71
72 input_values = self.processor(
73 audio_input, sampling_rate=sample_rate,
74 return_tensors="pt").input_values
75
76 # INFERENCE
77
78 # retrieve logits & take argmax
79 res = self.model(
80 input_values.to(self.model.device), output_hidden_states=True)
81 if return_all_layers:
82 feat = torch.cat(res.hidden_states)
83 else:
84 feat = res.hidden_states[-1]
85 feat = linear_interpolation(
86 feat, input_fps=50, output_fps=self.video_rate)
87
88 z = feat.to(dtype) # Encoding for the motion
89 return z
90
91 def get_audio_embed_bucket(self,
92 audio_embed,

Callers 1

encode_audioMethod · 0.80

Calls 2

linear_interpolationFunction · 0.85
toMethod · 0.80

Tested by

no test coverage detected