MCPcopy Index your code
hub / github.com/InternLM/lmdeploy / process_request

Method process_request

benchmark/profile_throughput.py:205–240  ·  view source on GitHub ↗
(self, requests, profiler: Profiler, concurrency, temperature, top_p, top_k, stream_output,
                        skip_tokenize, skip_detokenize, cancel_rate)

Source from the content-addressed store, hash-verified

203 session_id += concurrency
204
205 def process_request(self, requests, profiler: Profiler, concurrency, temperature, top_p, top_k, stream_output,
206 skip_tokenize, skip_detokenize, cancel_rate):
207 req_queue = Queue()
208
209 # feed request to q
210 for prompt, input_len, output_len in requests:
211 cancel_after = output_len + 1
212 if cancel_rate > 0:
213 if random.random() < cancel_rate:
214 cancel_after = random.randint(0, cancel_after)
215 sess = profiler.new_session(input_len, output_len)
216 req = [prompt, input_len, output_len, cancel_after, sess]
217 if skip_tokenize:
218 req[0] = self.tokenizer.encode(prompt)
219 req_queue.put(req)
220 for i in range(concurrency):
221 req_queue.put(None)
222
223 # start threads
224 tasks = []
225 for i in range(concurrency):
226 task = self._inference(req_queue, i, temperature, top_p, top_k, stream_output, skip_tokenize,
227 skip_detokenize, concurrency)
228 tasks.append(task)
229
230 async def _gather_tasks(tasks):
231 profiler.start()
232 ret = await asyncio.gather(*tasks)
233 profiler.finish()
234 return ret
235
236 self.pbar = tqdm(total=len(requests))
237
238 asyncio.run(_gather_tasks(tasks))
239
240 self.pbar.close()
241
242
243def parse_args():

Callers 1

mainFunction · 0.95

Calls 7

_inferenceMethod · 0.95
new_sessionMethod · 0.80
runMethod · 0.80
encodeMethod · 0.45
putMethod · 0.45
appendMethod · 0.45
closeMethod · 0.45

Tested by

no test coverage detected