Global `cut` function that supports parallel processing. Note that this only works using dt, custom POSTokenizer instances are not supported.
(sentence, HMM=True, use_paddle=False)
| 271 | |
| 272 | |
| 273 | def cut(sentence, HMM=True, use_paddle=False): |
| 274 | """ |
| 275 | Global `cut` function that supports parallel processing. |
| 276 | |
| 277 | Note that this only works using dt, custom POSTokenizer |
| 278 | instances are not supported. |
| 279 | """ |
| 280 | is_paddle_installed = check_paddle_install['is_paddle_installed'] |
| 281 | if use_paddle and is_paddle_installed: |
| 282 | # if sentence is null, it will raise core exception in paddle. |
| 283 | if sentence is None or sentence == "" or sentence == u"": |
| 284 | return |
| 285 | import jieba.lac_small.predict as predict |
| 286 | sents, tags = predict.get_result(strdecode(sentence)) |
| 287 | for i, sent in enumerate(sents): |
| 288 | if sent is None or tags[i] is None: |
| 289 | continue |
| 290 | yield pair(sent, tags[i]) |
| 291 | return |
| 292 | global dt |
| 293 | if jieba.pool is None: |
| 294 | for w in dt.cut(sentence, HMM=HMM): |
| 295 | yield w |
| 296 | else: |
| 297 | parts = strdecode(sentence).splitlines(True) |
| 298 | if HMM: |
| 299 | result = jieba.pool.map(_lcut_internal, parts) |
| 300 | else: |
| 301 | result = jieba.pool.map(_lcut_internal_no_hmm, parts) |
| 302 | for r in result: |
| 303 | for w in r: |
| 304 | yield w |
| 305 | |
| 306 | |
| 307 | def lcut(sentence, HMM=True, use_paddle=False): |