Extract feature. Args: audio: TODO.
(audio)
| 117 | |
| 118 | |
| 119 | def extract_feature(audio): |
| 120 | """Extract feature. |
| 121 | |
| 122 | Args: |
| 123 | audio: TODO. |
| 124 | """ |
| 125 | features = [] |
| 126 | feature_times = [] |
| 127 | feature_lengths = [] |
| 128 | for au in audio: |
| 129 | feature = Kaldi.fbank(au.unsqueeze(0), num_mel_bins=80) |
| 130 | feature = feature - feature.mean(dim=0, keepdim=True) |
| 131 | features.append(feature) |
| 132 | feature_times.append(au.shape[0]) |
| 133 | feature_lengths.append(feature.shape[0]) |
| 134 | # padding for batch inference |
| 135 | features_padded = pad_list(features, pad_value=0) |
| 136 | # features = torch.cat(features) |
| 137 | return features_padded, feature_lengths, feature_times |
| 138 | |
| 139 | |
| 140 | def postprocess( |