Returns all possible augmentations of ``text`` according to ``self.transformation``.
(self, text)
| 109 | return transformed_texts |
| 110 | |
| 111 | def augment(self, text): |
| 112 | """Returns all possible augmentations of ``text`` according to |
| 113 | ``self.transformation``.""" |
| 114 | attacked_text = AttackedText(text) |
| 115 | original_text = attacked_text |
| 116 | all_transformed_texts = set() |
| 117 | num_words_to_swap = max( |
| 118 | int(self.pct_words_to_swap * len(attacked_text.words)), 1 |
| 119 | ) |
| 120 | augmentation_results = [] |
| 121 | for _ in range(self.transformations_per_example): |
| 122 | current_text = attacked_text |
| 123 | words_swapped = len(current_text.attack_attrs["modified_indices"]) |
| 124 | |
| 125 | while words_swapped < num_words_to_swap: |
| 126 | transformed_texts = self.transformation( |
| 127 | current_text, self.pre_transformation_constraints |
| 128 | ) |
| 129 | |
| 130 | # Get rid of transformations we already have |
| 131 | transformed_texts = [ |
| 132 | t for t in transformed_texts if t not in all_transformed_texts |
| 133 | ] |
| 134 | |
| 135 | # Filter out transformations that don't match the constraints. |
| 136 | transformed_texts = self._filter_transformations( |
| 137 | transformed_texts, current_text, original_text |
| 138 | ) |
| 139 | |
| 140 | # if there's no more transformed texts after filter, terminate |
| 141 | if not len(transformed_texts): |
| 142 | break |
| 143 | |
| 144 | # look for all transformed_texts that has enough words swapped |
| 145 | if self.high_yield or self.fast_augment: |
| 146 | ready_texts = [ |
| 147 | text |
| 148 | for text in transformed_texts |
| 149 | if len(text.attack_attrs["modified_indices"]) |
| 150 | >= num_words_to_swap |
| 151 | ] |
| 152 | for text in ready_texts: |
| 153 | all_transformed_texts.add(text) |
| 154 | unfinished_texts = [ |
| 155 | text for text in transformed_texts if text not in ready_texts |
| 156 | ] |
| 157 | |
| 158 | if len(unfinished_texts): |
| 159 | current_text = random.choice(unfinished_texts) |
| 160 | else: |
| 161 | # no need for further augmentations if all of transformed_texts meet `num_words_to_swap` |
| 162 | break |
| 163 | else: |
| 164 | current_text = random.choice(transformed_texts) |
| 165 | |
| 166 | # update words_swapped based on modified indices |
| 167 | words_swapped = max( |
| 168 | len(current_text.attack_attrs["modified_indices"]), |