Print random text, generated using a trigram language model. See also `help(nltk.lm)`. :param length: The length of text to generate (default=100) :type length: int :param text_seed: Generation can be conditioned on preceding context. :type text_see
(self, length=100, text_seed=None, random_seed=42)
| 567 | return model |
| 568 | |
| 569 | def generate(self, length=100, text_seed=None, random_seed=42): |
| 570 | """ |
| 571 | Print random text, generated using a trigram language model. |
| 572 | See also `help(nltk.lm)`. |
| 573 | |
| 574 | :param length: The length of text to generate (default=100) |
| 575 | :type length: int |
| 576 | |
| 577 | :param text_seed: Generation can be conditioned on preceding context. |
| 578 | :type text_seed: list(str) |
| 579 | |
| 580 | :param random_seed: A random seed or an instance of `random.Random`. If provided, |
| 581 | makes the random sampling part of generation reproducible. (default=42) |
| 582 | :type random_seed: int |
| 583 | """ |
| 584 | # Create the model when using it the first time. |
| 585 | self._tokenized_sents = [ |
| 586 | sent.split(" ") for sent in sent_tokenize(" ".join(self.tokens)) |
| 587 | ] |
| 588 | if not hasattr(self, "_trigram_model"): |
| 589 | print("Building ngram index...", file=sys.stderr) |
| 590 | self._trigram_model = self._train_default_ngram_lm( |
| 591 | self._tokenized_sents, n=3 |
| 592 | ) |
| 593 | |
| 594 | generated_tokens = [] |
| 595 | |
| 596 | assert length > 0, "The `length` must be more than 0." |
| 597 | while len(generated_tokens) < length: |
| 598 | for idx, token in enumerate( |
| 599 | self._trigram_model.generate( |
| 600 | length, text_seed=text_seed, random_seed=random_seed |
| 601 | ) |
| 602 | ): |
| 603 | if token == "<s>": |
| 604 | continue |
| 605 | if token == "</s>": |
| 606 | break |
| 607 | generated_tokens.append(token) |
| 608 | random_seed += 1 |
| 609 | |
| 610 | prefix = " ".join(text_seed) + " " if text_seed else "" |
| 611 | output_str = prefix + tokenwrap(generated_tokens[:length]) |
| 612 | print(output_str) |
| 613 | return output_str |
| 614 | |
| 615 | def plot(self, *args): |
| 616 | """ |
nothing calls this directly
no test coverage detected