(
self,
engine: Union[Path, Engine],
executor_config: Optional[tllm.ExecutorConfig] = None,
batched_logits_processor: Optional[BatchedLogitsProcessor] = None,
postproc_worker_config: Optional[PostprocWorkerConfig] = None,
is_llm_executor: Optional[bool] = None,
hf_model_dir: Optional[Path] = None,
tokenizer: Optional[TokenizerBase] = None,
llm_args: Optional[BaseLlmArgs] = None,
rpc_addr: Optional[str] = None,
hmac_key: Optional[bytes] = None,
)
| 36 | class GenerationExecutorWorker(RpcWorkerMixin, BaseWorker): |
| 37 | |
| 38 | def __init__( |
| 39 | self, |
| 40 | engine: Union[Path, Engine], |
| 41 | executor_config: Optional[tllm.ExecutorConfig] = None, |
| 42 | batched_logits_processor: Optional[BatchedLogitsProcessor] = None, |
| 43 | postproc_worker_config: Optional[PostprocWorkerConfig] = None, |
| 44 | is_llm_executor: Optional[bool] = None, |
| 45 | hf_model_dir: Optional[Path] = None, |
| 46 | tokenizer: Optional[TokenizerBase] = None, |
| 47 | llm_args: Optional[BaseLlmArgs] = None, |
| 48 | rpc_addr: Optional[str] = None, |
| 49 | hmac_key: Optional[bytes] = None, |
| 50 | ) -> None: |
| 51 | super().__init__( |
| 52 | engine=engine, |
| 53 | executor_config=executor_config, |
| 54 | batched_logits_processor=batched_logits_processor, |
| 55 | postproc_worker_config=postproc_worker_config, |
| 56 | is_llm_executor=is_llm_executor, |
| 57 | hf_model_dir=hf_model_dir, |
| 58 | tokenizer=tokenizer, |
| 59 | llm_args=llm_args, |
| 60 | ) |
| 61 | |
| 62 | self.setup_engine() |
| 63 | |
| 64 | # Setup RPC server for stats (skip init_rpc_worker to keep IPC response queue) |
| 65 | # Only set up if rpc_addr is provided (for stats RPC support) |
| 66 | if rpc_addr is not None: |
| 67 | self.rpc_addr = rpc_addr |
| 68 | self.hmac_key = hmac_key |
| 69 | self.start_rpc_server() # Reuse from RpcWorkerMixin |
| 70 | |
| 71 | self.await_response_thread = ManagedThread( |
| 72 | self.await_response_task, |
| 73 | error_queue=self._error_queue, |
| 74 | name="await_response_thread") |
| 75 | |
| 76 | def start_thread(self, thread: ManagedThread): |
| 77 | if self.engine.can_enqueue_requests() and not thread.is_alive(): |
nothing calls this directly
no test coverage detected