| 270 | |
| 271 | |
| 272 | class SamplingParams(ctypes.Structure): |
| 273 | _pack_ = 4 |
| 274 | _fields_ = [ |
| 275 | ("best_of", ctypes.c_int), |
| 276 | ("n", ctypes.c_int), |
| 277 | ("do_sample", ctypes.c_bool), |
| 278 | ("presence_penalty", ctypes.c_float), |
| 279 | ("frequency_penalty", ctypes.c_float), |
| 280 | ("repetition_penalty", ctypes.c_float), |
| 281 | ("temperature", ctypes.c_float), |
| 282 | ("top_p", ctypes.c_float), |
| 283 | ("top_k", ctypes.c_int), |
| 284 | ("ignore_eos", ctypes.c_bool), |
| 285 | # the max number of image patches to be used in the internvl model, for the test |
| 286 | ("image_max_patch_num", ctypes.c_int), |
| 287 | ("max_new_tokens", ctypes.c_int), |
| 288 | ("min_new_tokens", ctypes.c_int), |
| 289 | # Whether to count input tokens for presence_penalty, frequency_penalty and repetition_penalty |
| 290 | ("input_penalty", ctypes.c_bool), |
| 291 | ("regular_constraint", RegularConstraint), |
| 292 | ("guided_grammar", GuidedGrammar), |
| 293 | ("guided_json", GuidedJsonSchema), |
| 294 | # If provided, the engine will construct a logits, |
| 295 | # processor which only retains scores for the given token ids. Defaults to None. |
| 296 | # allowed_token_ids only can be used in "--output_constraint_mode outlines" started server. |
| 297 | ("allowed_token_ids", AllowedTokenIds), |
| 298 | ("stop_sequences", StopSequenceGroups), |
| 299 | ("exponential_decay_length_penalty", ExponentialDecayLengthPenalty), |
| 300 | ("group_request_id", ctypes.c_int64), # p d mode used params |
| 301 | ("suggested_dp_index", ctypes.c_int), # suggest dp index, deepseekv2 dp mode, use to suggest used dp_index |
| 302 | ("move_kv_to_decode_node", DecodeNode), # move kv to deocde node, only used in pd mode |
| 303 | ("skip_special_tokens", ctypes.c_bool), # whether to skip special tokens when decoding |
| 304 | ("add_special_tokens", ctypes.c_bool), # whether to add special tokens when encoding |
| 305 | ( |
| 306 | "add_spaces_between_special_tokens", |
| 307 | ctypes.c_bool, |
| 308 | ), # whether to add spaces between special tokens when decoding |
| 309 | ("print_eos_token", ctypes.c_bool), # eos_id will be always ignored except the value is set to True |
| 310 | ] |
| 311 | |
| 312 | _do_sample: bool = False |
| 313 | _presence_penalty: float = 0.0 |
| 314 | _frequency_penalty: float = 0.0 |
| 315 | _repetition_penalty: float = 1.0 |
| 316 | _temperature: float = 1.0 |
| 317 | _top_p: float = 1.0 |
| 318 | _top_k: int = -1 # -1 is for all |
| 319 | |
| 320 | def init(self, tokenizer, **kwargs): |
| 321 | super().__init__() |
| 322 | self.best_of = kwargs.get("best_of", 1) |
| 323 | self.n = kwargs.get("n", self.best_of) |
| 324 | self.do_sample = kwargs.get("do_sample", SamplingParams._do_sample) |
| 325 | self.presence_penalty = kwargs.get("presence_penalty", SamplingParams._presence_penalty) |
| 326 | self.frequency_penalty = kwargs.get("frequency_penalty", SamplingParams._frequency_penalty) |
| 327 | self.repetition_penalty = kwargs.get("repetition_penalty", SamplingParams._repetition_penalty) |
| 328 | self.temperature = kwargs.get("temperature", SamplingParams._temperature) |
| 329 | self.top_p = kwargs.get("top_p", SamplingParams._top_p) |
no outgoing calls