(request: Request, httpserver_manager: HttpServerManager)
| 107 | |
| 108 | |
| 109 | async def lightllm_generate_stream(request: Request, httpserver_manager: HttpServerManager) -> Response: |
| 110 | |
| 111 | request_dict = await request.json() |
| 112 | prompt = request_dict.pop("inputs") |
| 113 | sample_params_dict = request_dict["parameters"] |
| 114 | _ = sample_params_dict.pop("return_details", False) |
| 115 | sampling_params = SamplingParams() |
| 116 | sampling_params.init(tokenizer=httpserver_manager.tokenizer, **sample_params_dict) |
| 117 | sampling_params.verify() |
| 118 | if sampling_params.best_of != 1: |
| 119 | raise Exception("stream api only support best_of == 1") |
| 120 | |
| 121 | multimodal_params_dict = request_dict.get("multimodal_params", {}) |
| 122 | multimodal_params = MultimodalParams(**multimodal_params_dict) |
| 123 | results_generator = httpserver_manager.generate(prompt, sampling_params, multimodal_params, request=request) |
| 124 | |
| 125 | # Streaming case |
| 126 | async def stream_results() -> AsyncGenerator[bytes, None]: |
| 127 | # input_usage 只会在第一个metadata中出现,所以需要保存下来 |
| 128 | input_usage = None |
| 129 | async for _, request_output, metadata, finish_status in results_generator: |
| 130 | if input_usage is None: |
| 131 | input_usage = metadata.get("input_usage", None) |
| 132 | |
| 133 | ret = { |
| 134 | "token": { |
| 135 | "id": metadata.get("id", None), |
| 136 | "text": request_output, |
| 137 | "logprob": metadata.get("logprob", None), |
| 138 | "special": metadata.get("special", False), |
| 139 | "count_output_tokens": metadata.get("count_output_tokens", 0), |
| 140 | "prompt_tokens": metadata.get("prompt_tokens", 0), |
| 141 | }, |
| 142 | "generated_text": None, |
| 143 | "finished": finish_status.is_finished(), |
| 144 | "finish_reason": finish_status.get_finish_reason(), |
| 145 | "details": None, |
| 146 | "input_usage": input_usage, |
| 147 | } |
| 148 | |
| 149 | yield ("data:" + json.dumps(ret, ensure_ascii=False) + "\n\n").encode("utf-8") |
| 150 | |
| 151 | background_tasks = BackgroundTasks() |
| 152 | return StreamingResponse(stream_results(), media_type="text/event-stream", background=background_tasks) |
nothing calls this directly
no test coverage detected