Function cut

jieba/posseg/__init__.py:273–304 · view source on GitHub ↗

Global `cut` function that supports parallel processing. Note that this only works using dt, custom POSTokenizer instances are not supported.

(sentence, HMM=True, use_paddle=False)

Source from the content-addressed store, hash-verified

271
272
273	def cut(sentence, HMM=True, use_paddle=False):
274	"""
275	Global `cut` function that supports parallel processing.
276
277	Note that this only works using dt, custom POSTokenizer
278	instances are not supported.
279	"""
280	is_paddle_installed = check_paddle_install['is_paddle_installed']
281	if use_paddle and is_paddle_installed:
282	# if sentence is null, it will raise core exception in paddle.
283	if sentence is None or sentence == "" or sentence == u"":
284	return
285	import jieba.lac_small.predict as predict
286	sents, tags = predict.get_result(strdecode(sentence))
287	for i, sent in enumerate(sents):
288	if sent is None or tags[i] is None:
289	continue
290	yield pair(sent, tags[i])
291	return
292	global dt
293	if jieba.pool is None:
294	for w in dt.cut(sentence, HMM=HMM):
295	yield w
296	else:
297	parts = strdecode(sentence).splitlines(True)
298	if HMM:
299	result = jieba.pool.map(_lcut_internal, parts)
300	else:
301	result = jieba.pool.map(_lcut_internal_no_hmm, parts)
302	for r in result:
303	for w in r:
304	yield w
305
306
307	def lcut(sentence, HMM=True, use_paddle=False):

lcutFunction · 0.70

strdecodeFunction · 0.85

pairClass · 0.85

cutMethod · 0.45

no test coverage detected