(
self,
prompt: Union[str, List[int]],
sampling_params: SamplingParams,
multimodal_params: MultimodalParams,
request: Request,
is_health_req: bool = False,
)
| 254 | return group_request_id |
| 255 | |
| 256 | async def generate( |
| 257 | self, |
| 258 | prompt: Union[str, List[int]], |
| 259 | sampling_params: SamplingParams, |
| 260 | multimodal_params: MultimodalParams, |
| 261 | request: Request, |
| 262 | is_health_req: bool = False, |
| 263 | ) -> Tuple[int, str, dict, FinishStatus]: |
| 264 | start_time = time.time() |
| 265 | request_headers = request.headers if request is not None else {} |
| 266 | group_request_id = self.alloc_req_id(sampling_params, is_health_req) |
| 267 | |
| 268 | try: |
| 269 | original_multimodal_params = None |
| 270 | if self.is_multinode_tp_master: |
| 271 | original_multimodal_params = copy.deepcopy(multimodal_params) |
| 272 | |
| 273 | if self.pd_mode.is_P_or_NORMAL(): |
| 274 | await multimodal_params.verify_and_preload(request) |
| 275 | |
| 276 | # 记录请求到达的相关信息 |
| 277 | await self._log_req_header(request_headers, group_request_id) |
| 278 | # 监控 |
| 279 | |
| 280 | prompt_ids = await self._encode(prompt, multimodal_params, sampling_params) |
| 281 | prompt_tokens = len(prompt_ids) |
| 282 | # 监控 |
| 283 | if group_request_id > 0: |
| 284 | self.metric_client.counter_inc("lightllm_request_count") |
| 285 | self.metric_client.histogram_observe("lightllm_request_input_length", prompt_tokens) |
| 286 | self.metric_client.histogram_observe("lightllm_request_max_new_tokens", sampling_params.max_new_tokens) |
| 287 | prompt_ids = await self._check_and_repair_length(prompt_ids, sampling_params) |
| 288 | |
| 289 | # 申请资源并存储 |
| 290 | alloced_req_indexes = [] |
| 291 | while len(alloced_req_indexes) < sampling_params.n: |
| 292 | alloc_req_index = await self.shm_req_manager.async_alloc_req_index() |
| 293 | sleep_time = 0.1 |
| 294 | while alloc_req_index is None: |
| 295 | await asyncio.sleep(sleep_time) |
| 296 | sleep_time *= 1.1 |
| 297 | sleep_time = min(1, sleep_time) |
| 298 | |
| 299 | alloc_req_index = await self.shm_req_manager.async_alloc_req_index() |
| 300 | alloced_req_indexes.append(alloc_req_index) |
| 301 | req_objs = [] |
| 302 | for i, req_index in enumerate(alloced_req_indexes): |
| 303 | req_obj = await self.shm_req_manager.async_get_req_obj_by_index(req_index) |
| 304 | req_obj.init( |
| 305 | group_request_id + i, |
| 306 | prompt_ids, |
| 307 | sampling_params, |
| 308 | self.tokenizer, |
| 309 | chunked_prefill_size=self.args.chunked_prefill_size, |
| 310 | ) |
| 311 | req_objs.append(req_obj) |
| 312 | |
| 313 | req_status = ReqStatus(group_request_id, multimodal_params, req_objs, start_time) |
no test coverage detected