(self)
| 57 | self.assertEqual(list(ngrams), expected_ngrams) |
| 58 | |
| 59 | def test_vocab_from_text(self): |
| 60 | vocab = text_utils.VocabFromText(self.VOCAB_EXAMPLE_SENTENCES) |
| 61 | |
| 62 | self.assertEqual(vocab.get_size(), 41) |
| 63 | self.assertEqual(len(vocab), 41) |
| 64 | self.assertEqual(vocab.get_unk_index(), 1) |
| 65 | |
| 66 | self.assertEqual(vocab.itos[0], vocab.DEFAULT_TOKENS[0]) |
| 67 | self.assertEqual(vocab.itos[34], "that") |
| 68 | self.assertEqual(vocab.itos[31], "cube") |
| 69 | self.assertEqual(vocab.itos[25], "cyan") |
| 70 | self.assertEqual(vocab.itos[20], "the") |
| 71 | self.assertEqual(vocab.itos[10], "than") |
| 72 | |
| 73 | self.assertEqual(vocab.stoi["sphere"], 30) |
| 74 | self.assertEqual(vocab.stoi["shape"], 22) |
| 75 | |
| 76 | vocab = text_utils.VocabFromText(self.VOCAB_EXAMPLE_SENTENCES, min_count=10) |
| 77 | self.assertEqual(vocab.get_size(), 5) |
| 78 | self.assertEqual(vocab.itos[vocab.get_size() - 1], "the") |
| 79 | |
| 80 | vocab = text_utils.VocabFromText(self.VOCAB_EXAMPLE_SENTENCES, min_count=11) |
| 81 | self.assertEqual(vocab.get_size(), 4) |
| 82 | |
| 83 | vocab = text_utils.VocabFromText( |
| 84 | self.VOCAB_EXAMPLE_SENTENCES, min_count=11, only_unk_extra=True |
| 85 | ) |
| 86 | self.assertEqual(vocab.get_size(), 1) |
| 87 | self.assertEqual(vocab.itos[vocab.get_size() - 1], "<unk>") |
| 88 | |
| 89 | vocab = text_utils.VocabFromText( |
| 90 | self.VOCAB_EXAMPLE_SENTENCES, min_count=1, remove=[";"] |
| 91 | ) |
| 92 | self.assertEqual(vocab.get_size(), 40) |
| 93 | |
| 94 | vocab = text_utils.VocabFromText( |
| 95 | self.VOCAB_EXAMPLE_SENTENCES, min_count=1, remove=[";", ",", "?"] |
| 96 | ) |
| 97 | self.assertEqual(vocab.get_size(), 38) |
| 98 | |
| 99 | vocab = text_utils.VocabFromText( |
| 100 | self.VOCAB_EXAMPLE_SENTENCES, min_count=1, keep=["?"], remove=";" |
| 101 | ) |
| 102 | self.assertEqual(vocab.get_size(), 40) |
| 103 |
nothing calls this directly
no test coverage detected