Generate responses. Args: messages (str | List): chat history or prompt session_id (int | Session): the session id or instance of Session gen_config (GenerationConfig | None): a instance of GenerationConfig. Default to None. st
(
self,
messages,
session_id: int | Session,
gen_config: GenerationConfig | None = None,
tools: list[object] | None = None,
reasoning_effort: Literal['low', 'medium', 'high'] | None = None,
stream_response: bool = True,
sequence_start: bool = True,
sequence_end: bool = True, # no interactive mode by default
step: int = 0,
do_preprocess: bool = True,
adapter_name: str | None = None,
rewind_stop_tokens: bool = False,
input_ids: list | None = None,
enable_thinking: bool | None = None,
chat_template_kwargs: dict | None = None,
media_io_kwargs: dict[str, Any] | None = None,
mm_processor_kwargs: dict[str, Any] | None = None,
**kwargs)
| 473 | metrics_processor.decrease_api_routed_requests() |
| 474 | |
| 475 | async def generate( |
| 476 | self, |
| 477 | messages, |
| 478 | session_id: int | Session, |
| 479 | gen_config: GenerationConfig | None = None, |
| 480 | tools: list[object] | None = None, |
| 481 | reasoning_effort: Literal['low', 'medium', 'high'] | None = None, |
| 482 | stream_response: bool = True, |
| 483 | sequence_start: bool = True, |
| 484 | sequence_end: bool = True, # no interactive mode by default |
| 485 | step: int = 0, |
| 486 | do_preprocess: bool = True, |
| 487 | adapter_name: str | None = None, |
| 488 | rewind_stop_tokens: bool = False, |
| 489 | input_ids: list | None = None, |
| 490 | enable_thinking: bool | None = None, |
| 491 | chat_template_kwargs: dict | None = None, |
| 492 | media_io_kwargs: dict[str, Any] | None = None, |
| 493 | mm_processor_kwargs: dict[str, Any] | None = None, |
| 494 | **kwargs): |
| 495 | """Generate responses. |
| 496 | |
| 497 | Args: |
| 498 | messages (str | List): chat history or prompt |
| 499 | session_id (int | Session): the session id or instance of Session |
| 500 | gen_config (GenerationConfig | None): a instance of |
| 501 | GenerationConfig. Default to None. |
| 502 | stream_response (bool): whether return responses streamingly |
| 503 | sequence_start (bool): indicator for starting a sequence |
| 504 | sequence_end (bool): indicator for ending a sequence |
| 505 | step (int): the offset of the k/v cache |
| 506 | do_preprocess (bool): whether pre-process the messages. Default to |
| 507 | True, which means chat_template will be applied. |
| 508 | """ |
| 509 | metrics_processor.increase_total_requests() |
| 510 | |
| 511 | if (messages is not None) ^ (input_ids is None): |
| 512 | raise ValueError('You must specify exactly one of messages or input_ids') |
| 513 | if isinstance(session_id, Session): |
| 514 | session = session_id |
| 515 | elif isinstance(session_id, int): |
| 516 | session = self.session_mgr.get(session_id, step=step) |
| 517 | else: |
| 518 | raise ValueError(f'Invalid session_id: {session_id}. It should be an instance of Session or an integer.') |
| 519 | session_id = session.session_id |
| 520 | session_removed = False |
| 521 | |
| 522 | def remove_session_once(): |
| 523 | nonlocal session_removed |
| 524 | if sequence_end and not session_removed: |
| 525 | self.session_mgr.remove(session) |
| 526 | session_removed = True |
| 527 | |
| 528 | chat_template_kwargs = chat_template_kwargs or {} |
| 529 | if enable_thinking is not None: |
| 530 | logger.warning('enable_thinking is deprecated, use chat_template_kwargs["enable_thinking"] instead') |
| 531 | if chat_template_kwargs.get('enable_thinking') is None: |
| 532 | chat_template_kwargs['enable_thinking'] = enable_thinking |
nothing calls this directly
no test coverage detected