MCPcopy
hub / github.com/NVIDIA/TensorRT-LLM / __init__

Method __init__

tensorrt_llm/executor/worker.py:38–74  ·  view source on GitHub ↗
(
        self,
        engine: Union[Path, Engine],
        executor_config: Optional[tllm.ExecutorConfig] = None,
        batched_logits_processor: Optional[BatchedLogitsProcessor] = None,
        postproc_worker_config: Optional[PostprocWorkerConfig] = None,
        is_llm_executor: Optional[bool] = None,
        hf_model_dir: Optional[Path] = None,
        tokenizer: Optional[TokenizerBase] = None,
        llm_args: Optional[BaseLlmArgs] = None,
        rpc_addr: Optional[str] = None,
        hmac_key: Optional[bytes] = None,
    )

Source from the content-addressed store, hash-verified

36class GenerationExecutorWorker(RpcWorkerMixin, BaseWorker):
37
38 def __init__(
39 self,
40 engine: Union[Path, Engine],
41 executor_config: Optional[tllm.ExecutorConfig] = None,
42 batched_logits_processor: Optional[BatchedLogitsProcessor] = None,
43 postproc_worker_config: Optional[PostprocWorkerConfig] = None,
44 is_llm_executor: Optional[bool] = None,
45 hf_model_dir: Optional[Path] = None,
46 tokenizer: Optional[TokenizerBase] = None,
47 llm_args: Optional[BaseLlmArgs] = None,
48 rpc_addr: Optional[str] = None,
49 hmac_key: Optional[bytes] = None,
50 ) -> None:
51 super().__init__(
52 engine=engine,
53 executor_config=executor_config,
54 batched_logits_processor=batched_logits_processor,
55 postproc_worker_config=postproc_worker_config,
56 is_llm_executor=is_llm_executor,
57 hf_model_dir=hf_model_dir,
58 tokenizer=tokenizer,
59 llm_args=llm_args,
60 )
61
62 self.setup_engine()
63
64 # Setup RPC server for stats (skip init_rpc_worker to keep IPC response queue)
65 # Only set up if rpc_addr is provided (for stats RPC support)
66 if rpc_addr is not None:
67 self.rpc_addr = rpc_addr
68 self.hmac_key = hmac_key
69 self.start_rpc_server() # Reuse from RpcWorkerMixin
70
71 self.await_response_thread = ManagedThread(
72 self.await_response_task,
73 error_queue=self._error_queue,
74 name="await_response_thread")
75
76 def start_thread(self, thread: ManagedThread):
77 if self.engine.can_enqueue_requests() and not thread.is_alive():

Callers

nothing calls this directly

Calls 3

ManagedThreadClass · 0.85
start_rpc_serverMethod · 0.80
setup_engineMethod · 0.45

Tested by

no test coverage detected