(self, waveforms)
| 17 | } |
| 18 | |
| 19 | def __call__(self, waveforms): |
| 20 | |
| 21 | batch_size = waveforms.shape[0] |
| 22 | |
| 23 | data_dict = { |
| 24 | 'segment': [], |
| 25 | 'mixture': [], |
| 26 | } |
| 27 | |
| 28 | for n in range(0, batch_size): |
| 29 | |
| 30 | segment = waveforms[n].clone() |
| 31 | |
| 32 | # create zero tensors as the background template |
| 33 | noise = torch.zeros_like(segment) |
| 34 | |
| 35 | mix_num = random.randint(2, self.max_mix_num) |
| 36 | assert mix_num >= 2 |
| 37 | |
| 38 | for i in range(1, mix_num): |
| 39 | next_segment = waveforms[(n + i) % batch_size] |
| 40 | rescaled_next_segment = dynamic_loudnorm(audio=next_segment, reference=segment, **self.loudness_param) |
| 41 | noise += rescaled_next_segment |
| 42 | |
| 43 | # randomly normalize background noise |
| 44 | noise = dynamic_loudnorm(audio=noise, reference=segment, **self.loudness_param) |
| 45 | |
| 46 | # create audio mixyure |
| 47 | mixture = segment + noise |
| 48 | |
| 49 | # declipping if need be |
| 50 | max_value = torch.max(torch.abs(mixture)) |
| 51 | if max_value > 1: |
| 52 | segment *= 0.9 / max_value |
| 53 | mixture *= 0.9 / max_value |
| 54 | |
| 55 | data_dict['segment'].append(segment) |
| 56 | data_dict['mixture'].append(mixture) |
| 57 | |
| 58 | for key in data_dict.keys(): |
| 59 | data_dict[key] = torch.stack(data_dict[key], dim=0) |
| 60 | |
| 61 | # return data_dict |
| 62 | return data_dict['mixture'], data_dict['segment'] |
| 63 | |
| 64 | |
| 65 | def rescale_to_match_energy(segment1, segment2): |
nothing calls this directly
no test coverage detected