MCPcopy
hub / github.com/InternLM/lmdeploy / _stop_words

Function _stop_words

lmdeploy/utils.py:223–243  ·  view source on GitHub ↗

Return list of stop-words to numpy.ndarray.

(stop_words: list[int | str], tokenizer: object)

Source from the content-addressed store, hash-verified

221
222# TODO remove stop_word_offsets stuff and make it clean
223def _stop_words(stop_words: list[int | str], tokenizer: object):
224 """Return list of stop-words to numpy.ndarray."""
225 import numpy as np
226 if stop_words is None:
227 return None
228 assert isinstance(stop_words, list) and \
229 all(isinstance(elem, (str, int)) for elem in stop_words), \
230 f'stop_words must be a list but got {type(stop_words)}'
231 stop_indexes = []
232 for stop_word in stop_words:
233 if isinstance(stop_word, str):
234 stop_indexes += tokenizer.indexes_containing_token(stop_word)
235 elif isinstance(stop_word, int):
236 stop_indexes.append(stop_word)
237 assert isinstance(stop_indexes, list) and all(isinstance(elem, int) for elem in stop_indexes), 'invalid stop_words'
238 # each id in stop_indexes represents a stop word
239 # refer to https://github.com/fauxpilot/fauxpilot/discussions/165 for
240 # detailed explanation about fastertransformer's stop_indexes
241 stop_word_offsets = range(1, len(stop_indexes) + 1)
242 stop_words = np.array([[stop_indexes, stop_word_offsets]]).astype(np.int32)
243 return stop_words
244
245
246def get_hf_gen_cfg(path: str, trust_remote_code: bool = False):

Callers 1

__init__Method · 0.90

Calls 2

appendMethod · 0.45

Tested by

no test coverage detected