MCPcopy
hub / github.com/ModelTC/LightLLM / SamplingParams

Class SamplingParams

lightllm/server/core/objs/sampling_params.py:272–486  ·  view source on GitHub ↗

Source from the content-addressed store, hash-verified

270
271
272class SamplingParams(ctypes.Structure):
273 _pack_ = 4
274 _fields_ = [
275 ("best_of", ctypes.c_int),
276 ("n", ctypes.c_int),
277 ("do_sample", ctypes.c_bool),
278 ("presence_penalty", ctypes.c_float),
279 ("frequency_penalty", ctypes.c_float),
280 ("repetition_penalty", ctypes.c_float),
281 ("temperature", ctypes.c_float),
282 ("top_p", ctypes.c_float),
283 ("top_k", ctypes.c_int),
284 ("ignore_eos", ctypes.c_bool),
285 # the max number of image patches to be used in the internvl model, for the test
286 ("image_max_patch_num", ctypes.c_int),
287 ("max_new_tokens", ctypes.c_int),
288 ("min_new_tokens", ctypes.c_int),
289 # Whether to count input tokens for presence_penalty, frequency_penalty and repetition_penalty
290 ("input_penalty", ctypes.c_bool),
291 ("regular_constraint", RegularConstraint),
292 ("guided_grammar", GuidedGrammar),
293 ("guided_json", GuidedJsonSchema),
294 # If provided, the engine will construct a logits,
295 # processor which only retains scores for the given token ids. Defaults to None.
296 # allowed_token_ids only can be used in "--output_constraint_mode outlines" started server.
297 ("allowed_token_ids", AllowedTokenIds),
298 ("stop_sequences", StopSequenceGroups),
299 ("exponential_decay_length_penalty", ExponentialDecayLengthPenalty),
300 ("group_request_id", ctypes.c_int64), # p d mode used params
301 ("suggested_dp_index", ctypes.c_int), # suggest dp index, deepseekv2 dp mode, use to suggest used dp_index
302 ("move_kv_to_decode_node", DecodeNode), # move kv to deocde node, only used in pd mode
303 ("skip_special_tokens", ctypes.c_bool), # whether to skip special tokens when decoding
304 ("add_special_tokens", ctypes.c_bool), # whether to add special tokens when encoding
305 (
306 "add_spaces_between_special_tokens",
307 ctypes.c_bool,
308 ), # whether to add spaces between special tokens when decoding
309 ("print_eos_token", ctypes.c_bool), # eos_id will be always ignored except the value is set to True
310 ]
311
312 _do_sample: bool = False
313 _presence_penalty: float = 0.0
314 _frequency_penalty: float = 0.0
315 _repetition_penalty: float = 1.0
316 _temperature: float = 1.0
317 _top_p: float = 1.0
318 _top_k: int = -1 # -1 is for all
319
320 def init(self, tokenizer, **kwargs):
321 super().__init__()
322 self.best_of = kwargs.get("best_of", 1)
323 self.n = kwargs.get("n", self.best_of)
324 self.do_sample = kwargs.get("do_sample", SamplingParams._do_sample)
325 self.presence_penalty = kwargs.get("presence_penalty", SamplingParams._presence_penalty)
326 self.frequency_penalty = kwargs.get("frequency_penalty", SamplingParams._frequency_penalty)
327 self.repetition_penalty = kwargs.get("repetition_penalty", SamplingParams._repetition_penalty)
328 self.temperature = kwargs.get("temperature", SamplingParams._temperature)
329 self.top_p = kwargs.get("top_p", SamplingParams._top_p)

Callers 15

health_checkFunction · 0.90
tgi_generate_implFunction · 0.90
tgi_generate_stream_implFunction · 0.90
tokensFunction · 0.90
lightllm_get_scoreFunction · 0.90
lightllm_generateFunction · 0.90
lightllm_generate_streamFunction · 0.90
chat_completions_implFunction · 0.90
completions_implFunction · 0.90
process_single_promptFunction · 0.90
initMethod · 0.70

Calls

no outgoing calls

Tested by 1