(args, httpserver_manager: HttpServerManager, request: Request)
| 61 | |
| 62 | |
| 63 | async def health_check(args, httpserver_manager: HttpServerManager, request: Request): |
| 64 | if health_obj.is_checking(): |
| 65 | return health_obj.is_health() |
| 66 | |
| 67 | if health_obj.is_health() and health_obj.has_latest_inference(): |
| 68 | return health_obj.is_health() |
| 69 | |
| 70 | health_obj.begin_check() |
| 71 | try: |
| 72 | request_dict = {"inputs": "你好!", "parameters": {"do_sample": True, "temperature": 0.8, "max_new_tokens": 2}} |
| 73 | if args.run_mode == "prefill": |
| 74 | request_dict["parameters"]["max_new_tokens"] = 1 |
| 75 | prompt = request_dict.pop("inputs") |
| 76 | sample_params_dict = request_dict["parameters"] |
| 77 | sampling_params = SamplingParams() |
| 78 | sampling_params.init(tokenizer=httpserver_manager.tokenizer, **sample_params_dict) |
| 79 | sampling_params.verify() |
| 80 | |
| 81 | if get_env_start_args().run_mode == "pd_master": |
| 82 | # Since the id assigned by pd master needs to be passed to prefill and decode nodes for inference, |
| 83 | # a normal request id is required instead of a negative id. |
| 84 | sampling_params.group_request_id = _g_health_req_id_gen.generate_id() |
| 85 | else: |
| 86 | sampling_params.group_request_id = -_g_health_req_id_gen.generate_id() # health monitor 的 id 是负的 |
| 87 | multimodal_params_dict = request_dict.get("multimodal_params", {}) |
| 88 | multimodal_params = MultimodalParams(**multimodal_params_dict) |
| 89 | results_generator = httpserver_manager.generate( |
| 90 | prompt, sampling_params, multimodal_params, request, is_health_req=True |
| 91 | ) |
| 92 | |
| 93 | async def check_timeout(results_generator): |
| 94 | async for _, _, _, _ in results_generator: |
| 95 | pass |
| 96 | |
| 97 | try: |
| 98 | await asyncio.wait_for(check_timeout(results_generator), timeout=health_obj.dynamic_timeout) |
| 99 | health_obj.set_health() |
| 100 | except asyncio.TimeoutError: |
| 101 | health_obj.set_unhealth() |
| 102 | logger.warning(f"Health check timeout! The failure count is: {str(health_obj._failure_count)}") |
| 103 | return health_obj.is_health() |
| 104 | except Exception as e: |
| 105 | logger.exception(str(e)) |
| 106 | health_obj.set_unhealth() |
| 107 | return health_obj.is_health() |
| 108 | finally: |
| 109 | health_obj.end_check() |
no test coverage detected