(request: ChatCompletionRequest)
| 224 | |
| 225 | @app.post("/v1/chat/completions", response_model=ChatCompletionResponse) |
| 226 | async def create_chat_completion(request: ChatCompletionRequest): |
| 227 | global model, tokenizer |
| 228 | |
| 229 | if len(request.messages) < 1 or request.messages[-1].role == "assistant": |
| 230 | raise HTTPException(status_code=400, detail="Invalid request") |
| 231 | |
| 232 | gen_params = dict( |
| 233 | messages=request.messages, |
| 234 | temperature=request.temperature, |
| 235 | top_p=request.top_p, |
| 236 | max_tokens=request.max_tokens or 1024, |
| 237 | echo=False, |
| 238 | stream=request.stream, |
| 239 | repetition_penalty=request.repetition_penalty, |
| 240 | tools=request.tools, |
| 241 | ) |
| 242 | logger.debug(f"==== request ====\n{gen_params}") |
| 243 | |
| 244 | if request.stream: |
| 245 | |
| 246 | # Use the stream mode to read the first few characters, if it is not a function call, direct stram output |
| 247 | predict_stream_generator = predict_stream(request.model, gen_params) |
| 248 | output = next(predict_stream_generator) |
| 249 | if not contains_custom_function(output): |
| 250 | return EventSourceResponse(predict_stream_generator, media_type="text/event-stream") |
| 251 | |
| 252 | # Obtain the result directly at one time and determine whether tools needs to be called. |
| 253 | logger.debug(f"First result output:\n{output}") |
| 254 | |
| 255 | function_call = None |
| 256 | if output and request.tools: |
| 257 | try: |
| 258 | function_call = process_response(output, use_tool=True) |
| 259 | except: |
| 260 | logger.warning("Failed to parse tool call") |
| 261 | |
| 262 | # CallFunction |
| 263 | if isinstance(function_call, dict): |
| 264 | function_call = FunctionCallResponse(**function_call) |
| 265 | |
| 266 | """ |
| 267 | In this demo, we did not register any tools. |
| 268 | You can use the tools that have been implemented in our `tools_using_demo` and implement your own streaming tool implementation here. |
| 269 | Similar to the following method: |
| 270 | function_args = json.loads(function_call.arguments) |
| 271 | tool_response = dispatch_tool(tool_name: str, tool_params: dict) |
| 272 | """ |
| 273 | tool_response = "" |
| 274 | |
| 275 | if not gen_params.get("messages"): |
| 276 | gen_params["messages"] = [] |
| 277 | |
| 278 | gen_params["messages"].append(ChatMessage( |
| 279 | role="assistant", |
| 280 | content=output, |
| 281 | )) |
| 282 | gen_params["messages"].append(ChatMessage( |
| 283 | role="function", |
nothing calls this directly
no test coverage detected