()
| 83 | |
| 84 | @pytest.fixture |
| 85 | def sented_doc(): |
| 86 | text = "One sentence. Two sentences. Three sentences." |
| 87 | nlp = English() |
| 88 | doc = nlp(text) |
| 89 | for i in range(len(doc)): |
| 90 | if i % 3 == 0: |
| 91 | doc[i].is_sent_start = True |
| 92 | else: |
| 93 | doc[i].is_sent_start = False |
| 94 | return doc |
| 95 | |
| 96 | |
| 97 | def test_tokenization(sented_doc): |