hub / github.com/NVIDIA/TensorRT-LLM / __init__

Method init

tensorrt_llm/executor/worker.py:38–74 · view source on GitHub ↗

(
        self,
        engine: Union[Path, Engine],
        executor_config: Optional[tllm.ExecutorConfig] = None,
        batched_logits_processor: Optional[BatchedLogitsProcessor] = None,
        postproc_worker_config: Optional[PostprocWorkerConfig] = None,
        is_llm_executor: Optional[bool] = None,
        hf_model_dir: Optional[Path] = None,
        tokenizer: Optional[TokenizerBase] = None,
        llm_args: Optional[BaseLlmArgs] = None,
        rpc_addr: Optional[str] = None,
        hmac_key: Optional[bytes] = None,
    )

Source from the content-addressed store, hash-verified

36	class GenerationExecutorWorker(RpcWorkerMixin, BaseWorker):
37
38	def __init__(
39	self,
40	engine: Union[Path, Engine],
41	executor_config: Optional[tllm.ExecutorConfig] = None,
42	batched_logits_processor: Optional[BatchedLogitsProcessor] = None,
43	postproc_worker_config: Optional[PostprocWorkerConfig] = None,
44	is_llm_executor: Optional[bool] = None,
45	hf_model_dir: Optional[Path] = None,
46	tokenizer: Optional[TokenizerBase] = None,
47	llm_args: Optional[BaseLlmArgs] = None,
48	rpc_addr: Optional[str] = None,
49	hmac_key: Optional[bytes] = None,
50	) -> None:
51	super().__init__(
52	engine=engine,
53	executor_config=executor_config,
54	batched_logits_processor=batched_logits_processor,
55	postproc_worker_config=postproc_worker_config,
56	is_llm_executor=is_llm_executor,
57	hf_model_dir=hf_model_dir,
58	tokenizer=tokenizer,
59	llm_args=llm_args,
60	)
61
62	self.setup_engine()
63
64	# Setup RPC server for stats (skip init_rpc_worker to keep IPC response queue)
65	# Only set up if rpc_addr is provided (for stats RPC support)
66	if rpc_addr is not None:
67	self.rpc_addr = rpc_addr
68	self.hmac_key = hmac_key
69	self.start_rpc_server() # Reuse from RpcWorkerMixin
70
71	self.await_response_thread = ManagedThread(
72	self.await_response_task,
73	error_queue=self._error_queue,
74	name="await_response_thread")
75
76	def start_thread(self, thread: ManagedThread):
77	if self.engine.can_enqueue_requests() and not thread.is_alive():

Callers

nothing calls this directly

Calls 3

ManagedThreadClass · 0.85

start_rpc_serverMethod · 0.80

setup_engineMethod · 0.45

Tested by

no test coverage detected

Method __init__

Source from the content-addressed store, hash-verified

Callers

Calls 3

Tested by

Method init