Method submit

tensorrt_llm/executor/base_worker.py:607–639 · view source on GitHub ↗

Low-level API to the executor. Return a "future" GenerationResult which can be waited.

(self, request: GenerationRequest)

Source from the content-addressed store, hash-verified

605	raise RequestError(str(e)) from e
606
607	def submit(self, request: GenerationRequest) -> GenerationResult:
608	""" Low-level API to the executor. Return a "future" GenerationResult which can be waited. """
609	self.start()
610
611	if self.rank != 0:
612	raise RuntimeError(
613	"Only rank 0 can submit requests.\n"
614	"To fix this, ensure that the llm.generate(...) method is "
615	"guarded with the `if __name__ == '__main__':` block.")
616
617	client_id = request.id if request.id is not None else self._get_next_client_id(
618	)
619	if request.id is None:
620	request.set_id(client_id)
621
622	logprob_params = self._get_logprob_params(request)
623
624	result = GenerationResult(
625	request,
626	background_error_handler=self._handle_background_error,
627	executor=self,
628	disaggregated_params=request.disaggregated_params,
629	logprob_params=logprob_params)
630
631	self._results[client_id] = result
632
633	request_id = self._enqueue_request(request)
634	# request_id returned from backend is necessary for the abort_request method.
635	self._client_id_to_request_id[client_id] = request_id
636
637	self._handle_background_error()
638
639	return result
640
641	def shutdown(self):
642	if self.doing_shutdown:

launch_workersMethod · 0.45

worker_mainFunction · 0.45

_call_futureMethod · 0.45

startMethod · 0.95

_enqueue_requestMethod · 0.95

GenerationResultClass · 0.85

_get_next_client_idMethod · 0.80

set_idMethod · 0.80

_get_logprob_paramsMethod · 0.80

_handle_background_errorMethod · 0.80

no test coverage detected