hub / github.com/ModelTC/LightLLM / generate

Method generate

lightllm/server/httpserver/manager.py:256–357 · view source on GitHub ↗

(
        self,
        prompt: Union[str, List[int]],
        sampling_params: SamplingParams,
        multimodal_params: MultimodalParams,
        request: Request,
        is_health_req: bool = False,
    )

Source from the content-addressed store, hash-verified

254	return group_request_id
255
256	async def generate(
257	self,
258	prompt: Union[str, List[int]],
259	sampling_params: SamplingParams,
260	multimodal_params: MultimodalParams,
261	request: Request,
262	is_health_req: bool = False,
263	) -> Tuple[int, str, dict, FinishStatus]:
264	start_time = time.time()
265	request_headers = request.headers if request is not None else {}
266	group_request_id = self.alloc_req_id(sampling_params, is_health_req)
267
268	try:
269	original_multimodal_params = None
270	if self.is_multinode_tp_master:
271	original_multimodal_params = copy.deepcopy(multimodal_params)
272
273	if self.pd_mode.is_P_or_NORMAL():
274	await multimodal_params.verify_and_preload(request)
275
276	# 记录请求到达的相关信息
277	await self._log_req_header(request_headers, group_request_id)
278	# 监控
279
280	prompt_ids = await self._encode(prompt, multimodal_params, sampling_params)
281	prompt_tokens = len(prompt_ids)
282	# 监控
283	if group_request_id > 0:
284	self.metric_client.counter_inc("lightllm_request_count")
285	self.metric_client.histogram_observe("lightllm_request_input_length", prompt_tokens)
286	self.metric_client.histogram_observe("lightllm_request_max_new_tokens", sampling_params.max_new_tokens)
287	prompt_ids = await self._check_and_repair_length(prompt_ids, sampling_params)
288
289	# 申请资源并存储
290	alloced_req_indexes = []
291	while len(alloced_req_indexes) < sampling_params.n:
292	alloc_req_index = await self.shm_req_manager.async_alloc_req_index()
293	sleep_time = 0.1
294	while alloc_req_index is None:
295	await asyncio.sleep(sleep_time)
296	sleep_time *= 1.1
297	sleep_time = min(1, sleep_time)
298
299	alloc_req_index = await self.shm_req_manager.async_alloc_req_index()
300	alloced_req_indexes.append(alloc_req_index)
301	req_objs = []
302	for i, req_index in enumerate(alloced_req_indexes):
303	req_obj = await self.shm_req_manager.async_get_req_obj_by_index(req_index)
304	req_obj.init(
305	group_request_id + i,
306	prompt_ids,
307	sampling_params,
308	self.tokenizer,
309	chunked_prefill_size=self.args.chunked_prefill_size,
310	)
311	req_objs.append(req_obj)
312
313	req_status = ReqStatus(group_request_id, multimodal_params, req_objs, start_time)

Callers 11

loop_for_requestMethod · 0.95

health_checkFunction · 0.45

tgi_generate_implFunction · 0.45

tgi_generate_stream_implFunction · 0.45

lightllm_get_scoreFunction · 0.45

lightllm_generateFunction · 0.45

lightllm_generate_streamFunction · 0.45

chat_completions_implFunction · 0.45

process_single_promptFunction · 0.45

_handle_streaming_completionFunction · 0.45

_pd_process_generateFunction · 0.45

Calls 15

alloc_req_idMethod · 0.95

_log_req_headerMethod · 0.95

_encodeMethod · 0.95

_check_and_repair_lengthMethod · 0.95

transfer_to_next_module_or_nodeMethod · 0.95

_wait_to_token_packageMethod · 0.95

_count_multimodal_tokensMethod · 0.95

_release_multimodal_resourcesMethod · 0.95

abortMethod · 0.95

is_P_or_NORMALMethod · 0.80

verify_and_preloadMethod · 0.80

async_alloc_req_indexMethod · 0.80

Tested by

no test coverage detected