Split an audio signal into overlapping frames. Args: audio: The input audio signal. sample_rate: The sample rate of the audio signal. hop_length: The length of the hopping (default is 20ms). ftt_size: The size of the FFT window (default is 1024). Return
(
audio: np.ndarray,
sample_rate: int,
hop_length: int = 20,
ftt_size: int = 1024,
)
| 172 | |
| 173 | |
| 174 | def audio_frames( |
| 175 | audio: np.ndarray, |
| 176 | sample_rate: int, |
| 177 | hop_length: int = 20, |
| 178 | ftt_size: int = 1024, |
| 179 | ) -> np.ndarray: |
| 180 | """ |
| 181 | Split an audio signal into overlapping frames. |
| 182 | |
| 183 | Args: |
| 184 | audio: The input audio signal. |
| 185 | sample_rate: The sample rate of the audio signal. |
| 186 | hop_length: The length of the hopping (default is 20ms). |
| 187 | ftt_size: The size of the FFT window (default is 1024). |
| 188 | |
| 189 | Returns: |
| 190 | An array of overlapping frames. |
| 191 | |
| 192 | Examples: |
| 193 | >>> audio = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]*1000) |
| 194 | >>> sample_rate = 8000 |
| 195 | >>> frames = audio_frames(audio, sample_rate, hop_length=10, ftt_size=512) |
| 196 | >>> frames.shape |
| 197 | (126, 512) |
| 198 | """ |
| 199 | |
| 200 | hop_size = np.round(sample_rate * hop_length / 1000).astype(int) |
| 201 | |
| 202 | # Pad the audio signal to handle edge cases |
| 203 | audio = np.pad(audio, int(ftt_size / 2), mode="reflect") |
| 204 | |
| 205 | # Calculate the number of frames |
| 206 | frame_count = int((len(audio) - ftt_size) / hop_size) + 1 |
| 207 | |
| 208 | # Initialize an array to store the frames |
| 209 | frames = np.zeros((frame_count, ftt_size)) |
| 210 | |
| 211 | # Split the audio signal into frames |
| 212 | for n in range(frame_count): |
| 213 | frames[n] = audio[n * hop_size : n * hop_size + ftt_size] |
| 214 | |
| 215 | return frames |
| 216 | |
| 217 | |
| 218 | def calculate_fft(audio_windowed: np.ndarray, ftt_size: int = 1024) -> np.ndarray: |