hub / github.com/ModelTC/LightLLM / SamplingParams

Class SamplingParams

lightllm/server/core/objs/sampling_params.py:272–486 · view source on GitHub ↗

Source from the content-addressed store, hash-verified

270
271
272	class SamplingParams(ctypes.Structure):
273	_pack_ = 4
274	_fields_ = [
275	("best_of", ctypes.c_int),
276	("n", ctypes.c_int),
277	("do_sample", ctypes.c_bool),
278	("presence_penalty", ctypes.c_float),
279	("frequency_penalty", ctypes.c_float),
280	("repetition_penalty", ctypes.c_float),
281	("temperature", ctypes.c_float),
282	("top_p", ctypes.c_float),
283	("top_k", ctypes.c_int),
284	("ignore_eos", ctypes.c_bool),
285	# the max number of image patches to be used in the internvl model, for the test
286	("image_max_patch_num", ctypes.c_int),
287	("max_new_tokens", ctypes.c_int),
288	("min_new_tokens", ctypes.c_int),
289	# Whether to count input tokens for presence_penalty, frequency_penalty and repetition_penalty
290	("input_penalty", ctypes.c_bool),
291	("regular_constraint", RegularConstraint),
292	("guided_grammar", GuidedGrammar),
293	("guided_json", GuidedJsonSchema),
294	# If provided, the engine will construct a logits,
295	# processor which only retains scores for the given token ids. Defaults to None.
296	# allowed_token_ids only can be used in "--output_constraint_mode outlines" started server.
297	("allowed_token_ids", AllowedTokenIds),
298	("stop_sequences", StopSequenceGroups),
299	("exponential_decay_length_penalty", ExponentialDecayLengthPenalty),
300	("group_request_id", ctypes.c_int64), # p d mode used params
301	("suggested_dp_index", ctypes.c_int), # suggest dp index, deepseekv2 dp mode, use to suggest used dp_index
302	("move_kv_to_decode_node", DecodeNode), # move kv to deocde node, only used in pd mode
303	("skip_special_tokens", ctypes.c_bool), # whether to skip special tokens when decoding
304	("add_special_tokens", ctypes.c_bool), # whether to add special tokens when encoding
305	(
306	"add_spaces_between_special_tokens",
307	ctypes.c_bool,
308	), # whether to add spaces between special tokens when decoding
309	("print_eos_token", ctypes.c_bool), # eos_id will be always ignored except the value is set to True
310	]
311
312	_do_sample: bool = False
313	_presence_penalty: float = 0.0
314	_frequency_penalty: float = 0.0
315	_repetition_penalty: float = 1.0
316	_temperature: float = 1.0
317	_top_p: float = 1.0
318	_top_k: int = -1 # -1 is for all
319
320	def init(self, tokenizer, **kwargs):
321	super().__init__()
322	self.best_of = kwargs.get("best_of", 1)
323	self.n = kwargs.get("n", self.best_of)
324	self.do_sample = kwargs.get("do_sample", SamplingParams._do_sample)
325	self.presence_penalty = kwargs.get("presence_penalty", SamplingParams._presence_penalty)
326	self.frequency_penalty = kwargs.get("frequency_penalty", SamplingParams._frequency_penalty)
327	self.repetition_penalty = kwargs.get("repetition_penalty", SamplingParams._repetition_penalty)
328	self.temperature = kwargs.get("temperature", SamplingParams._temperature)
329	self.top_p = kwargs.get("top_p", SamplingParams._top_p)

Callers 15

health_checkFunction · 0.90

tgi_generate_implFunction · 0.90

tgi_generate_stream_implFunction · 0.90

tokensFunction · 0.90

lightllm_get_scoreFunction · 0.90

lightllm_generateFunction · 0.90

lightllm_generate_streamFunction · 0.90

chat_completions_implFunction · 0.90

completions_implFunction · 0.90

process_single_promptFunction · 0.90

test_sampling_params_initializationFunction · 0.90

initMethod · 0.70

Calls

no outgoing calls

Tested by 1

test_sampling_params_initializationFunction · 0.72