Function _stop_words

lmdeploy/utils.py:223–243 · view source on GitHub ↗

Return list of stop-words to numpy.ndarray.

(stop_words: list[int | str], tokenizer: object)

Source from the content-addressed store, hash-verified

221
222	# TODO remove stop_word_offsets stuff and make it clean
223	def _stop_words(stop_words: list[int \| str], tokenizer: object):
224	"""Return list of stop-words to numpy.ndarray."""
225	import numpy as np
226	if stop_words is None:
227	return None
228	assert isinstance(stop_words, list) and \
229	all(isinstance(elem, (str, int)) for elem in stop_words), \
230	f'stop_words must be a list but got {type(stop_words)}'
231	stop_indexes = []
232	for stop_word in stop_words:
233	if isinstance(stop_word, str):
234	stop_indexes += tokenizer.indexes_containing_token(stop_word)
235	elif isinstance(stop_word, int):
236	stop_indexes.append(stop_word)
237	assert isinstance(stop_indexes, list) and all(isinstance(elem, int) for elem in stop_indexes), 'invalid stop_words'
238	# each id in stop_indexes represents a stop word
239	# refer to https://github.com/fauxpilot/fauxpilot/discussions/165 for
240	# detailed explanation about fastertransformer's stop_indexes
241	stop_word_offsets = range(1, len(stop_indexes) + 1)
242	stop_words = np.array([[stop_indexes, stop_word_offsets]]).astype(np.int32)
243	return stop_words
244
245
246	def get_hf_gen_cfg(path: str, trust_remote_code: bool = False):

__init__Method · 0.90

indexes_containing_tokenMethod · 0.45

appendMethod · 0.45

no test coverage detected