(self)
| 637 | ) |
| 638 | |
| 639 | def test_span_infilling(self): |
| 640 | bart_noise = BARTNoising( |
| 641 | vocab=[self.FAKE_VOCAB], |
| 642 | mask_tok=self.MASK_TOK, |
| 643 | mask_ratio=0.5, |
| 644 | mask_length="span-poisson", |
| 645 | poisson_lambda=3.0, |
| 646 | is_joiner=True, |
| 647 | replace_length=1, |
| 648 | # insert_ratio=0.5, |
| 649 | # random_ratio=0.3, |
| 650 | # Defalt: full_stop_token=[".", "?", "!"] |
| 651 | ) |
| 652 | self.assertIsNotNone(bart_noise.mask_span_distribution) |
| 653 | tokens = ["H■", "ell■", "o", "world", ".", "An■", "other", "!"] |
| 654 | # start token of word are identified using subword marker |
| 655 | token_starts = [True, False, False, True, True, True, False, True] |
| 656 | self.assertEqual(bart_noise._is_word_start(tokens), token_starts) |
| 657 | bart_noise.apply(copy.copy(tokens)) |
| 658 | # n_words = sum(token_starts) |
| 659 | # n_masked = math.ceil(n_words * bart_noise.mask_ratio) |
| 660 | # print(f"Text Span Infilling: {infillied} / {tokens}") |
| 661 | # print(n_words, n_masked) |
| 662 | |
| 663 | |
| 664 | class TestFeaturesTransform(unittest.TestCase): |
nothing calls this directly
no test coverage detected