MCPcopy
hub / github.com/facebookresearch/mmf / test_vocab_from_text

Method test_vocab_from_text

tests/utils/test_text_utils.py:59–102  ·  view source on GitHub ↗
(self)

Source from the content-addressed store, hash-verified

57 self.assertEqual(list(ngrams), expected_ngrams)
58
59 def test_vocab_from_text(self):
60 vocab = text_utils.VocabFromText(self.VOCAB_EXAMPLE_SENTENCES)
61
62 self.assertEqual(vocab.get_size(), 41)
63 self.assertEqual(len(vocab), 41)
64 self.assertEqual(vocab.get_unk_index(), 1)
65
66 self.assertEqual(vocab.itos[0], vocab.DEFAULT_TOKENS[0])
67 self.assertEqual(vocab.itos[34], "that")
68 self.assertEqual(vocab.itos[31], "cube")
69 self.assertEqual(vocab.itos[25], "cyan")
70 self.assertEqual(vocab.itos[20], "the")
71 self.assertEqual(vocab.itos[10], "than")
72
73 self.assertEqual(vocab.stoi["sphere"], 30)
74 self.assertEqual(vocab.stoi["shape"], 22)
75
76 vocab = text_utils.VocabFromText(self.VOCAB_EXAMPLE_SENTENCES, min_count=10)
77 self.assertEqual(vocab.get_size(), 5)
78 self.assertEqual(vocab.itos[vocab.get_size() - 1], "the")
79
80 vocab = text_utils.VocabFromText(self.VOCAB_EXAMPLE_SENTENCES, min_count=11)
81 self.assertEqual(vocab.get_size(), 4)
82
83 vocab = text_utils.VocabFromText(
84 self.VOCAB_EXAMPLE_SENTENCES, min_count=11, only_unk_extra=True
85 )
86 self.assertEqual(vocab.get_size(), 1)
87 self.assertEqual(vocab.itos[vocab.get_size() - 1], "<unk>")
88
89 vocab = text_utils.VocabFromText(
90 self.VOCAB_EXAMPLE_SENTENCES, min_count=1, remove=[";"]
91 )
92 self.assertEqual(vocab.get_size(), 40)
93
94 vocab = text_utils.VocabFromText(
95 self.VOCAB_EXAMPLE_SENTENCES, min_count=1, remove=[";", ",", "?"]
96 )
97 self.assertEqual(vocab.get_size(), 38)
98
99 vocab = text_utils.VocabFromText(
100 self.VOCAB_EXAMPLE_SENTENCES, min_count=1, keep=["?"], remove=";"
101 )
102 self.assertEqual(vocab.get_size(), 40)
103

Callers

nothing calls this directly

Calls 2

get_sizeMethod · 0.45
get_unk_indexMethod · 0.45

Tested by

no test coverage detected