(model_id: str, params: dict)
| 341 | |
| 342 | |
| 343 | async def predict(model_id: str, params: dict): |
| 344 | global model, tokenizer |
| 345 | |
| 346 | choice_data = ChatCompletionResponseStreamChoice( |
| 347 | index=0, |
| 348 | delta=DeltaMessage(role="assistant"), |
| 349 | finish_reason=None |
| 350 | ) |
| 351 | chunk = ChatCompletionResponse(model=model_id, id="", choices=[choice_data], object="chat.completion.chunk") |
| 352 | yield "{}".format(chunk.model_dump_json(exclude_unset=True)) |
| 353 | |
| 354 | previous_text = "" |
| 355 | for new_response in generate_stream_chatglm3(model, tokenizer, params): |
| 356 | decoded_unicode = new_response["text"] |
| 357 | delta_text = decoded_unicode[len(previous_text):] |
| 358 | previous_text = decoded_unicode |
| 359 | |
| 360 | finish_reason = new_response["finish_reason"] |
| 361 | if len(delta_text) == 0 and finish_reason != "function_call": |
| 362 | continue |
| 363 | |
| 364 | function_call = None |
| 365 | if finish_reason == "function_call": |
| 366 | try: |
| 367 | function_call = process_response(decoded_unicode, use_tool=True) |
| 368 | except: |
| 369 | logger.warning( |
| 370 | "Failed to parse tool call, maybe the response is not a tool call or have been answered.") |
| 371 | |
| 372 | if isinstance(function_call, dict): |
| 373 | function_call = FunctionCallResponse(**function_call) |
| 374 | |
| 375 | delta = DeltaMessage( |
| 376 | content=delta_text, |
| 377 | role="assistant", |
| 378 | function_call=function_call if isinstance(function_call, FunctionCallResponse) else None, |
| 379 | ) |
| 380 | |
| 381 | choice_data = ChatCompletionResponseStreamChoice( |
| 382 | index=0, |
| 383 | delta=delta, |
| 384 | finish_reason=finish_reason |
| 385 | ) |
| 386 | chunk = ChatCompletionResponse( |
| 387 | model=model_id, |
| 388 | id="", |
| 389 | choices=[choice_data], |
| 390 | object="chat.completion.chunk" |
| 391 | ) |
| 392 | yield "{}".format(chunk.model_dump_json(exclude_unset=True)) |
| 393 | |
| 394 | choice_data = ChatCompletionResponseStreamChoice( |
| 395 | index=0, |
| 396 | delta=DeltaMessage(), |
| 397 | finish_reason="stop" |
| 398 | ) |
| 399 | chunk = ChatCompletionResponse( |
| 400 | model=model_id, |
no test coverage detected