Use the `N`-gram language model to generate sentences. Parameters ---------- N : int The gram-size of the model to generate from seed_words : list of strs A list of seed words to use to condition the initial sentence gener
(self, N, seed_words=["<bol>"], n_sentences=5)
| 133 | return probs |
| 134 | |
| 135 | def generate(self, N, seed_words=["<bol>"], n_sentences=5): |
| 136 | """ |
| 137 | Use the `N`-gram language model to generate sentences. |
| 138 | |
| 139 | Parameters |
| 140 | ---------- |
| 141 | N : int |
| 142 | The gram-size of the model to generate from |
| 143 | seed_words : list of strs |
| 144 | A list of seed words to use to condition the initial sentence |
| 145 | generation. Default is ``["<bol>"]``. |
| 146 | sentences : int |
| 147 | The number of sentences to generate from the `N`-gram model. |
| 148 | Default is 50. |
| 149 | |
| 150 | Returns |
| 151 | ------- |
| 152 | sentences : str |
| 153 | Samples from the `N`-gram model, joined by white spaces, with |
| 154 | individual sentences separated by newlines. |
| 155 | """ |
| 156 | counter = 0 |
| 157 | sentences = [] |
| 158 | words = seed_words.copy() |
| 159 | while counter < n_sentences: |
| 160 | nextw, probs = zip(*self.completions(words, N)) |
| 161 | probs = np.exp(probs) / np.exp(probs).sum() # renormalize probs if smoothed |
| 162 | next_word = np.random.choice(nextw, p=probs) |
| 163 | |
| 164 | # if we reach the end of a sentence, save it and start a new one |
| 165 | if next_word == "<eol>": |
| 166 | S = " ".join([w for w in words if w != "<bol>"]) |
| 167 | S = textwrap.fill(S, 90, initial_indent="", subsequent_indent=" ") |
| 168 | print(S) |
| 169 | words.append(next_word) |
| 170 | sentences.append(words) |
| 171 | words = seed_words.copy() |
| 172 | counter += 1 |
| 173 | continue |
| 174 | |
| 175 | words.append(next_word) |
| 176 | return sentences |
| 177 | |
| 178 | def perplexity(self, words, N): |
| 179 | r""" |
nothing calls this directly
no test coverage detected