MCPcopy
hub / github.com/QData/TextAttack / run

Method run

textattack/commands/peek_dataset_command.py:31–74  ·  view source on GitHub ↗
(self, args)

Source from the content-addressed store, hash-verified

29 """
30
31 def run(self, args):
32 UPPERCASE_LETTERS_REGEX = re.compile("[A-Z]")
33
34 dataset_args = textattack.DatasetArgs(**vars(args))
35 dataset = textattack.DatasetArgs._create_dataset_from_args(dataset_args)
36
37 num_words = []
38 attacked_texts = []
39 data_all_lowercased = True
40 outputs = []
41 for inputs, output in dataset:
42 at = textattack.shared.AttackedText(inputs)
43 if data_all_lowercased:
44 # Test if any of the letters in the string are lowercase.
45 if re.search(UPPERCASE_LETTERS_REGEX, at.text):
46 data_all_lowercased = False
47 attacked_texts.append(at)
48 num_words.append(len(at.words))
49 outputs.append(output)
50
51 logger.info(f"Number of samples: {_cb(len(attacked_texts))}")
52 logger.info("Number of words per input:")
53 num_words = np.array(num_words)
54 logger.info(f'\t{("total:").ljust(8)} {_cb(num_words.sum())}')
55 mean_words = f"{num_words.mean():.2f}"
56 logger.info(f'\t{("mean:").ljust(8)} {_cb(mean_words)}')
57 std_words = f"{num_words.std():.2f}"
58 logger.info(f'\t{("std:").ljust(8)} {_cb(std_words)}')
59 logger.info(f'\t{("min:").ljust(8)} {_cb(num_words.min())}')
60 logger.info(f'\t{("max:").ljust(8)} {_cb(num_words.max())}')
61 logger.info(f"Dataset lowercased: {_cb(data_all_lowercased)}")
62
63 logger.info("First sample:")
64 print(attacked_texts[0].printable_text(), "\n")
65 logger.info("Last sample:")
66 print(attacked_texts[-1].printable_text(), "\n")
67
68 logger.info(f"Found {len(set(outputs))} distinct outputs.")
69 if len(outputs) < 20:
70 print(sorted(set(outputs)))
71
72 logger.info("Most common outputs:")
73 for i, (key, value) in enumerate(collections.Counter(outputs).most_common(20)):
74 print("\t", str(key)[:5].ljust(5), f" ({value})")
75
76 @staticmethod
77 def register_subcommand(main_parser: ArgumentParser):

Callers

nothing calls this directly

Calls 3

printable_textMethod · 0.80
_cbFunction · 0.70

Tested by

no test coverage detected