MCPcopy
hub / github.com/ModelTC/LightLLM / lightllm_generate_stream

Function lightllm_generate_stream

lightllm/server/api_lightllm.py:109–152  ·  view source on GitHub ↗
(request: Request, httpserver_manager: HttpServerManager)

Source from the content-addressed store, hash-verified

107
108
109async def lightllm_generate_stream(request: Request, httpserver_manager: HttpServerManager) -> Response:
110
111 request_dict = await request.json()
112 prompt = request_dict.pop("inputs")
113 sample_params_dict = request_dict["parameters"]
114 _ = sample_params_dict.pop("return_details", False)
115 sampling_params = SamplingParams()
116 sampling_params.init(tokenizer=httpserver_manager.tokenizer, **sample_params_dict)
117 sampling_params.verify()
118 if sampling_params.best_of != 1:
119 raise Exception("stream api only support best_of == 1")
120
121 multimodal_params_dict = request_dict.get("multimodal_params", {})
122 multimodal_params = MultimodalParams(**multimodal_params_dict)
123 results_generator = httpserver_manager.generate(prompt, sampling_params, multimodal_params, request=request)
124
125 # Streaming case
126 async def stream_results() -> AsyncGenerator[bytes, None]:
127 # input_usage 只会在第一个metadata中出现,所以需要保存下来
128 input_usage = None
129 async for _, request_output, metadata, finish_status in results_generator:
130 if input_usage is None:
131 input_usage = metadata.get("input_usage", None)
132
133 ret = {
134 "token": {
135 "id": metadata.get("id", None),
136 "text": request_output,
137 "logprob": metadata.get("logprob", None),
138 "special": metadata.get("special", False),
139 "count_output_tokens": metadata.get("count_output_tokens", 0),
140 "prompt_tokens": metadata.get("prompt_tokens", 0),
141 },
142 "generated_text": None,
143 "finished": finish_status.is_finished(),
144 "finish_reason": finish_status.get_finish_reason(),
145 "details": None,
146 "input_usage": input_usage,
147 }
148
149 yield ("data:" + json.dumps(ret, ensure_ascii=False) + "\n\n").encode("utf-8")
150
151 background_tasks = BackgroundTasks()
152 return StreamingResponse(stream_results(), media_type="text/event-stream", background=background_tasks)

Callers

nothing calls this directly

Calls 8

initMethod · 0.95
verifyMethod · 0.95
SamplingParamsClass · 0.90
MultimodalParamsClass · 0.85
popMethod · 0.80
stream_resultsFunction · 0.70
getMethod · 0.45
generateMethod · 0.45

Tested by

no test coverage detected