hub / github.com/InternLM/lmdeploy / process_request

Method process_request

benchmark/profile_pipeline_api.py:145–201 · view source on GitHub ↗

(self, requests, profiler: Profiler, temperature, top_p, top_k, stream_output)

Source from the content-addressed store, hash-verified

143	self.csv = csv
144
145	def process_request(self, requests, profiler: Profiler, temperature, top_p, top_k, stream_output):
146
147	prompts = [prompt for prompt, _, _ in requests]
148	gen_configs = [
149	GenerationConfig(temperature=temperature,
150	top_p=top_p,
151	top_k=top_k,
152	ignore_eos=True,
153	do_sample=False,
154	return_routed_experts=self.return_routed_experts,
155	max_new_tokens=output_len) for _, _, output_len in requests
156	]
157
158	sess: list[Session] = []
159	for _, input_len, output_len in requests:
160	sess.append(profiler.new_session(input_len, output_len))
161
162	def _to_status(finish_reason):
163	if finish_reason == 'length':
164	return Session.SUCCESS
165	else:
166	return Session.FAIL
167
168	profiler.start()
169
170	for s in sess:
171	s.tick(0)
172
173	if stream_output:
174	pbar = tqdm(total=len(requests))
175	for output in self.pipe.stream_infer(prompts, gen_config=gen_configs, do_preprocess=False):
176	index = output.index
177	n_token = output.generate_token_len
178	finish_reason = output.finish_reason
179	sess[index].tick(n_token)
180	if finish_reason is not None:
181	sess[index].finish(_to_status(finish_reason))
182	pbar.update(1)
183	pbar.close()
184	else:
185	for output in self.pipe(prompts, gen_configs, do_preprocess=False, use_tqdm=True):
186	index = output.index
187	n_token = output.generate_token_len
188	finish_reason = output.finish_reason
189	sess[index].tick(n_token)
190	sess[index].finish(_to_status(finish_reason))
191
192	profiler.finish()
193
194	# report first failure
195	for i, s in enumerate(sess):
196	if s.status != Session.SUCCESS or s.ns[-1] < s.req_output_len:
197	logger.error(f'Request {i} failed with {s.ns[-1]}/{s.req_output_len} tokens generated' # noqa: E501
198	)
199	logger.error(f'Prompt: {prompts[i]}')
200	logger.warning('Got failed requests, metrics may be invalid')
201	break
202

Callers 1

mainFunction · 0.95

Calls 10

GenerationConfigClass · 0.90

new_sessionMethod · 0.80

appendMethod · 0.45

startMethod · 0.45

tickMethod · 0.45

stream_inferMethod · 0.45

finishMethod · 0.45

updateMethod · 0.45

closeMethod · 0.45

pipeMethod · 0.45

Tested by

no test coverage detected