MCPcopy
hub / github.com/InternLM/lmdeploy / __init__

Method __init__

benchmark/profile_throughput.py:136–155  ·  view source on GitHub ↗
(self, model_path: str,
                 engine_config: PytorchEngineConfig | TurbomindEngineConfig,
                 speculative_config: SpeculativeConfig,
                 trust_remote_code: bool = False)

Source from the content-addressed store, hash-verified

134class Engine:
135
136 def __init__(self, model_path: str,
137 engine_config: PytorchEngineConfig | TurbomindEngineConfig,
138 speculative_config: SpeculativeConfig,
139 trust_remote_code: bool = False):
140 self.tokenizer = Tokenizer(model_path)
141 if isinstance(engine_config, TurbomindEngineConfig):
142 from lmdeploy.turbomind import TurboMind
143 tm_model = TurboMind.from_pretrained(model_path, engine_config=engine_config,
144 trust_remote_code=trust_remote_code)
145 self.backend = 'turbomind'
146 elif isinstance(engine_config, PytorchEngineConfig):
147 from lmdeploy.pytorch.engine import Engine as PytorchEngine
148 tm_model = PytorchEngine.from_pretrained(model_path,
149 engine_config=engine_config,
150 speculative_config=speculative_config,
151 trust_remote_code=trust_remote_code)
152 self.backend = 'pytorch'
153
154 self.tm_model = tm_model
155 self.pbar = None
156
157 async def _inference(self, req_queue: Queue, session_id: int, temperature: float, top_p: float, top_k: int,
158 stream_output: bool, skip_tokenize: bool, skip_detokenize: bool, concurrency: int):

Callers

nothing calls this directly

Calls 2

TokenizerClass · 0.90
from_pretrainedMethod · 0.45

Tested by

no test coverage detected