MCPcopy
hub / github.com/nltk/nltk / generate

Method generate

nltk/text.py:569–613  ·  view source on GitHub ↗

Print random text, generated using a trigram language model. See also `help(nltk.lm)`. :param length: The length of text to generate (default=100) :type length: int :param text_seed: Generation can be conditioned on preceding context. :type text_see

(self, length=100, text_seed=None, random_seed=42)

Source from the content-addressed store, hash-verified

567 return model
568
569 def generate(self, length=100, text_seed=None, random_seed=42):
570 """
571 Print random text, generated using a trigram language model.
572 See also `help(nltk.lm)`.
573
574 :param length: The length of text to generate (default=100)
575 :type length: int
576
577 :param text_seed: Generation can be conditioned on preceding context.
578 :type text_seed: list(str)
579
580 :param random_seed: A random seed or an instance of `random.Random`. If provided,
581 makes the random sampling part of generation reproducible. (default=42)
582 :type random_seed: int
583 """
584 # Create the model when using it the first time.
585 self._tokenized_sents = [
586 sent.split(" ") for sent in sent_tokenize(" ".join(self.tokens))
587 ]
588 if not hasattr(self, "_trigram_model"):
589 print("Building ngram index...", file=sys.stderr)
590 self._trigram_model = self._train_default_ngram_lm(
591 self._tokenized_sents, n=3
592 )
593
594 generated_tokens = []
595
596 assert length > 0, "The `length` must be more than 0."
597 while len(generated_tokens) < length:
598 for idx, token in enumerate(
599 self._trigram_model.generate(
600 length, text_seed=text_seed, random_seed=random_seed
601 )
602 ):
603 if token == "<s>":
604 continue
605 if token == "</s>":
606 break
607 generated_tokens.append(token)
608 random_seed += 1
609
610 prefix = " ".join(text_seed) + " " if text_seed else ""
611 output_str = prefix + tokenwrap(generated_tokens[:length])
612 print(output_str)
613 return output_str
614
615 def plot(self, *args):
616 """

Callers

nothing calls this directly

Calls 5

sent_tokenizeFunction · 0.90
tokenwrapFunction · 0.90
joinMethod · 0.45
appendMethod · 0.45

Tested by

no test coverage detected