(
nlp: Language, docs: List[Doc], batch_size: Optional[int]
)
| 105 | |
| 106 | |
| 107 | def annotate( |
| 108 | nlp: Language, docs: List[Doc], batch_size: Optional[int] |
| 109 | ) -> numpy.ndarray: |
| 110 | docs = nlp.pipe(tqdm(docs, unit="doc", disable=None), batch_size=batch_size) |
| 111 | wps = [] |
| 112 | while True: |
| 113 | with time_context() as elapsed: |
| 114 | batch_docs = list( |
| 115 | islice(docs, batch_size if batch_size else nlp.batch_size) |
| 116 | ) |
| 117 | if len(batch_docs) == 0: |
| 118 | break |
| 119 | n_tokens = count_tokens(batch_docs) |
| 120 | wps.append(n_tokens / elapsed.elapsed) |
| 121 | |
| 122 | return numpy.array(wps) |
| 123 | |
| 124 | |
| 125 | def benchmark( |
no test coverage detected
searching dependent graphs…