MCPcopy
hub / github.com/InternLM/lmdeploy / generate

Method generate

lmdeploy/serve/core/async_engine.py:475–781  ·  view source on GitHub ↗

Generate responses. Args: messages (str | List): chat history or prompt session_id (int | Session): the session id or instance of Session gen_config (GenerationConfig | None): a instance of GenerationConfig. Default to None. st

(
            self,
            messages,
            session_id: int | Session,
            gen_config: GenerationConfig | None = None,
            tools: list[object] | None = None,
            reasoning_effort: Literal['low', 'medium', 'high'] | None = None,
            stream_response: bool = True,
            sequence_start: bool = True,
            sequence_end: bool = True,  # no interactive mode by default
            step: int = 0,
            do_preprocess: bool = True,
            adapter_name: str | None = None,
            rewind_stop_tokens: bool = False,
            input_ids: list | None = None,
            enable_thinking: bool | None = None,
            chat_template_kwargs: dict | None = None,
            media_io_kwargs: dict[str, Any] | None = None,
            mm_processor_kwargs: dict[str, Any] | None = None,
            **kwargs)

Source from the content-addressed store, hash-verified

473 metrics_processor.decrease_api_routed_requests()
474
475 async def generate(
476 self,
477 messages,
478 session_id: int | Session,
479 gen_config: GenerationConfig | None = None,
480 tools: list[object] | None = None,
481 reasoning_effort: Literal['low', 'medium', 'high'] | None = None,
482 stream_response: bool = True,
483 sequence_start: bool = True,
484 sequence_end: bool = True, # no interactive mode by default
485 step: int = 0,
486 do_preprocess: bool = True,
487 adapter_name: str | None = None,
488 rewind_stop_tokens: bool = False,
489 input_ids: list | None = None,
490 enable_thinking: bool | None = None,
491 chat_template_kwargs: dict | None = None,
492 media_io_kwargs: dict[str, Any] | None = None,
493 mm_processor_kwargs: dict[str, Any] | None = None,
494 **kwargs):
495 """Generate responses.
496
497 Args:
498 messages (str | List): chat history or prompt
499 session_id (int | Session): the session id or instance of Session
500 gen_config (GenerationConfig | None): a instance of
501 GenerationConfig. Default to None.
502 stream_response (bool): whether return responses streamingly
503 sequence_start (bool): indicator for starting a sequence
504 sequence_end (bool): indicator for ending a sequence
505 step (int): the offset of the k/v cache
506 do_preprocess (bool): whether pre-process the messages. Default to
507 True, which means chat_template will be applied.
508 """
509 metrics_processor.increase_total_requests()
510
511 if (messages is not None) ^ (input_ids is None):
512 raise ValueError('You must specify exactly one of messages or input_ids')
513 if isinstance(session_id, Session):
514 session = session_id
515 elif isinstance(session_id, int):
516 session = self.session_mgr.get(session_id, step=step)
517 else:
518 raise ValueError(f'Invalid session_id: {session_id}. It should be an instance of Session or an integer.')
519 session_id = session.session_id
520 session_removed = False
521
522 def remove_session_once():
523 nonlocal session_removed
524 if sequence_end and not session_removed:
525 self.session_mgr.remove(session)
526 session_removed = True
527
528 chat_template_kwargs = chat_template_kwargs or {}
529 if enable_thinking is not None:
530 logger.warning('enable_thinking is deprecated, use chat_template_kwargs["enable_thinking"] instead')
531 if chat_template_kwargs.get('enable_thinking') is None:
532 chat_template_kwargs['enable_thinking'] = enable_thinking

Callers

nothing calls this directly

Calls 15

_determine_gen_configMethod · 0.95
_if_session_staleMethod · 0.95
safe_runMethod · 0.95
DetokenizeStateClass · 0.90
RequestStatsClass · 0.90
EngineOutputClass · 0.90
IterationStatsClass · 0.90
GenOutClass · 0.85
log_inputsMethod · 0.80

Tested by

no test coverage detected