MCPcopy
hub / github.com/QData/TextAttack / augment

Method augment

textattack/augmentation/augmenter.py:111–196  ·  view source on GitHub ↗

Returns all possible augmentations of ``text`` according to ``self.transformation``.

(self, text)

Source from the content-addressed store, hash-verified

109 return transformed_texts
110
111 def augment(self, text):
112 """Returns all possible augmentations of ``text`` according to
113 ``self.transformation``."""
114 attacked_text = AttackedText(text)
115 original_text = attacked_text
116 all_transformed_texts = set()
117 num_words_to_swap = max(
118 int(self.pct_words_to_swap * len(attacked_text.words)), 1
119 )
120 augmentation_results = []
121 for _ in range(self.transformations_per_example):
122 current_text = attacked_text
123 words_swapped = len(current_text.attack_attrs["modified_indices"])
124
125 while words_swapped < num_words_to_swap:
126 transformed_texts = self.transformation(
127 current_text, self.pre_transformation_constraints
128 )
129
130 # Get rid of transformations we already have
131 transformed_texts = [
132 t for t in transformed_texts if t not in all_transformed_texts
133 ]
134
135 # Filter out transformations that don't match the constraints.
136 transformed_texts = self._filter_transformations(
137 transformed_texts, current_text, original_text
138 )
139
140 # if there's no more transformed texts after filter, terminate
141 if not len(transformed_texts):
142 break
143
144 # look for all transformed_texts that has enough words swapped
145 if self.high_yield or self.fast_augment:
146 ready_texts = [
147 text
148 for text in transformed_texts
149 if len(text.attack_attrs["modified_indices"])
150 >= num_words_to_swap
151 ]
152 for text in ready_texts:
153 all_transformed_texts.add(text)
154 unfinished_texts = [
155 text for text in transformed_texts if text not in ready_texts
156 ]
157
158 if len(unfinished_texts):
159 current_text = random.choice(unfinished_texts)
160 else:
161 # no need for further augmentations if all of transformed_texts meet `num_words_to_swap`
162 break
163 else:
164 current_text = random.choice(transformed_texts)
165
166 # update words_swapped based on modified indices
167 words_swapped = max(
168 len(current_text.attack_attrs["modified_indices"]),

Calls 7

AttackedTextClass · 0.90
PerplexityClass · 0.90
USEMetricClass · 0.90
AugmentationResultClass · 0.85
printable_textMethod · 0.80
calculateMethod · 0.45