Returns a list of n-grams (tuples of n successive words) from the given string. Punctuation marks are stripped from words.
(string, n=3, punctuation=PUNCTUATION, **kwargs)
| 339 | PUNCTUATION = ".,;:!?()[]{}`''\"@#$^&*+-|=~_" |
| 340 | |
| 341 | def ngrams(string, n=3, punctuation=PUNCTUATION, **kwargs): |
| 342 | """ Returns a list of n-grams (tuples of n successive words) from the given string. |
| 343 | Punctuation marks are stripped from words. |
| 344 | """ |
| 345 | s = string |
| 346 | s = s.replace(".", " .") |
| 347 | s = s.replace("?", " ?") |
| 348 | s = s.replace("!", " !") |
| 349 | s = [w.strip(punctuation) for w in s.split()] |
| 350 | s = [w.strip() for w in s if w.strip()] |
| 351 | return [tuple(s[i:i+n]) for i in range(len(s)-n+1)] |
| 352 | |
| 353 | class Weight(float): |
| 354 | """ A float with a magic "assessments" property, |
no test coverage detected
searching dependent graphs…