(self, model_path: str,
engine_config: PytorchEngineConfig | TurbomindEngineConfig,
speculative_config: SpeculativeConfig,
trust_remote_code: bool = False)
| 134 | class Engine: |
| 135 | |
| 136 | def __init__(self, model_path: str, |
| 137 | engine_config: PytorchEngineConfig | TurbomindEngineConfig, |
| 138 | speculative_config: SpeculativeConfig, |
| 139 | trust_remote_code: bool = False): |
| 140 | self.tokenizer = Tokenizer(model_path) |
| 141 | if isinstance(engine_config, TurbomindEngineConfig): |
| 142 | from lmdeploy.turbomind import TurboMind |
| 143 | tm_model = TurboMind.from_pretrained(model_path, engine_config=engine_config, |
| 144 | trust_remote_code=trust_remote_code) |
| 145 | self.backend = 'turbomind' |
| 146 | elif isinstance(engine_config, PytorchEngineConfig): |
| 147 | from lmdeploy.pytorch.engine import Engine as PytorchEngine |
| 148 | tm_model = PytorchEngine.from_pretrained(model_path, |
| 149 | engine_config=engine_config, |
| 150 | speculative_config=speculative_config, |
| 151 | trust_remote_code=trust_remote_code) |
| 152 | self.backend = 'pytorch' |
| 153 | |
| 154 | self.tm_model = tm_model |
| 155 | self.pbar = None |
| 156 | |
| 157 | async def _inference(self, req_queue: Queue, session_id: int, temperature: float, top_p: float, top_k: int, |
| 158 | stream_output: bool, skip_tokenize: bool, skip_detokenize: bool, concurrency: int): |
nothing calls this directly
no test coverage detected