MCPcopy
hub / github.com/InternLM/lmdeploy / process_request

Method process_request

benchmark/benchmark_guided.py:208–263  ·  view source on GitHub ↗
(self, requests, profiler: Profiler, temperature, top_p, top_k,
                        stream_output, ignore_eos, response_format=None)

Source from the content-addressed store, hash-verified

206 self.return_routed_experts = getattr(self.pipe.backend_config, 'enable_return_routed_experts', False)
207
208 def process_request(self, requests, profiler: Profiler, temperature, top_p, top_k,
209 stream_output, ignore_eos, response_format=None):
210
211 prompts = [prompt for prompt, _, _ in requests]
212 gen_configs = [
213 GenerationConfig(temperature=temperature,
214 top_p=top_p,
215 top_k=top_k,
216 ignore_eos=ignore_eos,
217 do_sample=False,
218 return_routed_experts=self.return_routed_experts,
219 response_format=response_format,
220 max_new_tokens=output_len) for _, _, output_len in requests
221 ]
222
223 sess: list[Session] = []
224 for _, input_len, output_len in requests:
225 sess.append(profiler.new_session(input_len, output_len))
226
227 def _to_status(finish_reason):
228 if finish_reason in ('length', 'stop'):
229 return Session.SUCCESS
230 return Session.FAIL
231
232 profiler.start()
233 for s in sess:
234 s.tick(0)
235
236 if stream_output:
237 pbar = tqdm(total=len(requests))
238 for output in self.pipe.stream_infer(prompts, gen_config=gen_configs, do_preprocess=False):
239 idx = output.index
240 n_token = output.generate_token_len
241 finish_reason = output.finish_reason
242 sess[idx].tick(n_token)
243 if finish_reason is not None:
244 sess[idx].finish(_to_status(finish_reason))
245 pbar.update(1)
246 pbar.close()
247 else:
248 for output in self.pipe(prompts, gen_configs, do_preprocess=False, use_tqdm=True):
249 idx = output.index
250 n_token = output.generate_token_len
251 finish_reason = output.finish_reason
252 sess[idx].tick(n_token)
253 sess[idx].finish(_to_status(finish_reason))
254
255 profiler.finish()
256
257 # Collect actual per-request output lengths for detailed comparison
258 actual_output_lens = []
259 for i, s in enumerate(sess):
260 actual_output_lens.append(s.ns[-1] if s.status == Session.SUCCESS else 0)
261 if s.status != Session.SUCCESS:
262 logger.warning(f'Request {i}: {s.ns[-1]}/{s.req_output_len} tokens, finish != length/stop')
263 return actual_output_lens
264
265

Callers 1

run_onceFunction · 0.45

Calls 10

GenerationConfigClass · 0.90
new_sessionMethod · 0.80
appendMethod · 0.45
startMethod · 0.45
tickMethod · 0.45
stream_inferMethod · 0.45
finishMethod · 0.45
updateMethod · 0.45
closeMethod · 0.45
pipeMethod · 0.45

Tested by

no test coverage detected