MCPcopy
hub / github.com/ModelTC/LightLLM / generate

Method generate

lightllm/server/httpserver/manager.py:256–357  ·  view source on GitHub ↗
(
        self,
        prompt: Union[str, List[int]],
        sampling_params: SamplingParams,
        multimodal_params: MultimodalParams,
        request: Request,
        is_health_req: bool = False,
    )

Source from the content-addressed store, hash-verified

254 return group_request_id
255
256 async def generate(
257 self,
258 prompt: Union[str, List[int]],
259 sampling_params: SamplingParams,
260 multimodal_params: MultimodalParams,
261 request: Request,
262 is_health_req: bool = False,
263 ) -> Tuple[int, str, dict, FinishStatus]:
264 start_time = time.time()
265 request_headers = request.headers if request is not None else {}
266 group_request_id = self.alloc_req_id(sampling_params, is_health_req)
267
268 try:
269 original_multimodal_params = None
270 if self.is_multinode_tp_master:
271 original_multimodal_params = copy.deepcopy(multimodal_params)
272
273 if self.pd_mode.is_P_or_NORMAL():
274 await multimodal_params.verify_and_preload(request)
275
276 # 记录请求到达的相关信息
277 await self._log_req_header(request_headers, group_request_id)
278 # 监控
279
280 prompt_ids = await self._encode(prompt, multimodal_params, sampling_params)
281 prompt_tokens = len(prompt_ids)
282 # 监控
283 if group_request_id > 0:
284 self.metric_client.counter_inc("lightllm_request_count")
285 self.metric_client.histogram_observe("lightllm_request_input_length", prompt_tokens)
286 self.metric_client.histogram_observe("lightllm_request_max_new_tokens", sampling_params.max_new_tokens)
287 prompt_ids = await self._check_and_repair_length(prompt_ids, sampling_params)
288
289 # 申请资源并存储
290 alloced_req_indexes = []
291 while len(alloced_req_indexes) < sampling_params.n:
292 alloc_req_index = await self.shm_req_manager.async_alloc_req_index()
293 sleep_time = 0.1
294 while alloc_req_index is None:
295 await asyncio.sleep(sleep_time)
296 sleep_time *= 1.1
297 sleep_time = min(1, sleep_time)
298
299 alloc_req_index = await self.shm_req_manager.async_alloc_req_index()
300 alloced_req_indexes.append(alloc_req_index)
301 req_objs = []
302 for i, req_index in enumerate(alloced_req_indexes):
303 req_obj = await self.shm_req_manager.async_get_req_obj_by_index(req_index)
304 req_obj.init(
305 group_request_id + i,
306 prompt_ids,
307 sampling_params,
308 self.tokenizer,
309 chunked_prefill_size=self.args.chunked_prefill_size,
310 )
311 req_objs.append(req_obj)
312
313 req_status = ReqStatus(group_request_id, multimodal_params, req_objs, start_time)

Callers 11

loop_for_requestMethod · 0.95
health_checkFunction · 0.45
tgi_generate_implFunction · 0.45
tgi_generate_stream_implFunction · 0.45
lightllm_get_scoreFunction · 0.45
lightllm_generateFunction · 0.45
lightllm_generate_streamFunction · 0.45
chat_completions_implFunction · 0.45
process_single_promptFunction · 0.45
_pd_process_generateFunction · 0.45

Calls 15

alloc_req_idMethod · 0.95
_log_req_headerMethod · 0.95
_encodeMethod · 0.95
abortMethod · 0.95
is_P_or_NORMALMethod · 0.80
verify_and_preloadMethod · 0.80
async_alloc_req_indexMethod · 0.80

Tested by

no test coverage detected