(self)
| 656 | return |
| 657 | |
| 658 | async def handle_loop(self): |
| 659 | self.recycle_event = asyncio.Event() |
| 660 | asyncio.create_task(self.recycle_resource_loop()) |
| 661 | |
| 662 | # 多节点tp模式下的slave节点,需要开启一个协程task用来接收 |
| 663 | # master 转发过来的请求对象。 |
| 664 | if self.is_multinode_tp_slave: |
| 665 | asyncio.create_task(self.loop_for_request()) |
| 666 | |
| 667 | if self.pd_mode.is_P_or_D(): |
| 668 | from lightllm.server.httpserver.pd_loop import pd_handle_loop |
| 669 | |
| 670 | asyncio.create_task(pd_handle_loop(self)) |
| 671 | |
| 672 | while True: |
| 673 | try: |
| 674 | await asyncio.wait_for(self.recv_from_detokenization.recv_pyobj(), timeout=0.05) |
| 675 | except asyncio.TimeoutError: |
| 676 | pass |
| 677 | |
| 678 | try: |
| 679 | for group_req_id_ in list(self.req_id_to_out_inf.keys()): |
| 680 | req_status = self.req_id_to_out_inf.get(group_req_id_, None) |
| 681 | if req_status is None: |
| 682 | continue |
| 683 | |
| 684 | token_list = [] |
| 685 | for req in req_status.group_req_objs.shm_req_objs: |
| 686 | req_id = req.request_id |
| 687 | read_token_count = 1 |
| 688 | if req.out_tokens_queue.is_full(): |
| 689 | read_token_count = LIGHTLLM_OUT_TOKEN_QUEUE_SIZE |
| 690 | |
| 691 | for _ in range(read_token_count): |
| 692 | if not req.out_tokens_queue.is_empty(): |
| 693 | |
| 694 | text, src_index, special, count_output_tokens = req.out_tokens_queue.peek() |
| 695 | req.cumlogprob += float(req.shm_logprobs.arr[src_index]) |
| 696 | metadata = { |
| 697 | "id": int(req.shm_prompt_ids.arr[src_index]), |
| 698 | "logprob": float(req.shm_logprobs.arr[src_index]), |
| 699 | "cumlogprob": float(req.cumlogprob) / count_output_tokens, |
| 700 | "special": special, |
| 701 | "count_output_tokens": count_output_tokens, |
| 702 | "prompt_cache_len": req.prompt_cache_len, |
| 703 | "mtp_accepted_token_num": req.mtp_accepted_token_num, |
| 704 | } |
| 705 | if self.args.return_all_prompt_logprobs: |
| 706 | metadata.update(req.get_all_prompt_metadata()) |
| 707 | if self.args.use_reward_model: |
| 708 | metadata["score"] = float(req.reward_score) |
| 709 | |
| 710 | req.out_tokens_queue.pop_no_ret() |
| 711 | |
| 712 | finished_token_index = ( |
| 713 | req.stop_str_matched_token_index if req.stop_str_matched else req.finish_token_index |
| 714 | ) |
| 715 |
no test coverage detected