(
manager: HttpServerManager, prompt, sampling_params, multimodal_params, forwarding_queue: AsyncQueue
)
| 161 | |
| 162 | # 触发推理的task |
| 163 | async def _pd_process_generate( |
| 164 | manager: HttpServerManager, prompt, sampling_params, multimodal_params, forwarding_queue: AsyncQueue |
| 165 | ): |
| 166 | try: |
| 167 | async for sub_req_id, request_output, metadata, finish_status in manager.generate( |
| 168 | prompt, sampling_params, multimodal_params, None |
| 169 | ): |
| 170 | # p d 模式下,将 token 数据放入到转发队列中, 请求id 小于0的请求是health探测请求,不用转发。 |
| 171 | is_health_check_req = sub_req_id < 0 |
| 172 | if not is_health_check_req: |
| 173 | await forwarding_queue.put((sub_req_id, request_output, metadata, finish_status)) |
| 174 | |
| 175 | except BaseException as e: |
| 176 | logger.error(str(e)) |
| 177 | |
| 178 | |
| 179 | # 转发token的task |
no test coverage detected