Implementation of SpecAug. Reference: Daniel S. Park et al. "SpecAugment: A Simple Data Augmentation Method for Automatic Speech Recognition" .. warning:: When using cuda mode, time_warp doesn't have reproducibility due to `torch.nn.functional.inter
| 15 | |
| 16 | @tables.register("specaug_classes", "SpecAug") |
| 17 | class SpecAug(nn.Module): |
| 18 | """Implementation of SpecAug. |
| 19 | |
| 20 | Reference: |
| 21 | Daniel S. Park et al. |
| 22 | "SpecAugment: A Simple Data |
| 23 | Augmentation Method for Automatic Speech Recognition" |
| 24 | |
| 25 | .. warning:: |
| 26 | When using cuda mode, time_warp doesn't have reproducibility |
| 27 | due to `torch.nn.functional.interpolate`. |
| 28 | |
| 29 | """ |
| 30 | |
| 31 | def __init__( |
| 32 | self, |
| 33 | apply_time_warp: bool = True, |
| 34 | time_warp_window: int = 5, |
| 35 | time_warp_mode: str = "bicubic", |
| 36 | apply_freq_mask: bool = True, |
| 37 | freq_mask_width_range: Union[int, Sequence[int]] = (0, 20), |
| 38 | num_freq_mask: int = 2, |
| 39 | apply_time_mask: bool = True, |
| 40 | time_mask_width_range: Optional[Union[int, Sequence[int]]] = None, |
| 41 | time_mask_width_ratio_range: Optional[Union[float, Sequence[float]]] = None, |
| 42 | num_time_mask: int = 2, |
| 43 | ): |
| 44 | """Initialize SpecAug. |
| 45 | |
| 46 | Args: |
| 47 | apply_time_warp: TODO. |
| 48 | time_warp_window: TODO. |
| 49 | time_warp_mode: TODO. |
| 50 | apply_freq_mask: TODO. |
| 51 | freq_mask_width_range: TODO. |
| 52 | num_freq_mask: TODO. |
| 53 | apply_time_mask: TODO. |
| 54 | time_mask_width_range: TODO. |
| 55 | time_mask_width_ratio_range: TODO. |
| 56 | num_time_mask: TODO. |
| 57 | """ |
| 58 | if not apply_time_warp and not apply_time_mask and not apply_freq_mask: |
| 59 | raise ValueError("Either one of time_warp, time_mask, or freq_mask should be applied") |
| 60 | if ( |
| 61 | apply_time_mask |
| 62 | and (time_mask_width_range is not None) |
| 63 | and (time_mask_width_ratio_range is not None) |
| 64 | ): |
| 65 | raise ValueError( |
| 66 | 'Either one of "time_mask_width_range" or ' |
| 67 | '"time_mask_width_ratio_range" can be used' |
| 68 | ) |
| 69 | super().__init__() |
| 70 | self.apply_time_warp = apply_time_warp |
| 71 | self.apply_freq_mask = apply_freq_mask |
| 72 | self.apply_time_mask = apply_time_mask |
| 73 | |
| 74 | if apply_time_warp: |