The function call is compatible with stream mode output. The first seven characters are determined. If not a function call, the stream output is directly generated. Otherwise, the complete character content of the function call is returned. :param model_id: :param gen_para
(model_id, gen_params)
| 407 | |
| 408 | |
| 409 | def predict_stream(model_id, gen_params): |
| 410 | """ |
| 411 | The function call is compatible with stream mode output. |
| 412 | |
| 413 | The first seven characters are determined. |
| 414 | If not a function call, the stream output is directly generated. |
| 415 | Otherwise, the complete character content of the function call is returned. |
| 416 | |
| 417 | :param model_id: |
| 418 | :param gen_params: |
| 419 | :return: |
| 420 | """ |
| 421 | output = "" |
| 422 | is_function_call = False |
| 423 | has_send_first_chunk = False |
| 424 | for new_response in generate_stream_chatglm3(model, tokenizer, gen_params): |
| 425 | decoded_unicode = new_response["text"] |
| 426 | delta_text = decoded_unicode[len(output):] |
| 427 | output = decoded_unicode |
| 428 | |
| 429 | # When it is not a function call and the character length is> 7, |
| 430 | # try to judge whether it is a function call according to the special function prefix |
| 431 | if not is_function_call and len(output) > 7: |
| 432 | |
| 433 | # Determine whether a function is called |
| 434 | is_function_call = contains_custom_function(output) |
| 435 | if is_function_call: |
| 436 | continue |
| 437 | |
| 438 | # Non-function call, direct stream output |
| 439 | finish_reason = new_response["finish_reason"] |
| 440 | |
| 441 | # Send an empty string first to avoid truncation by subsequent next() operations. |
| 442 | if not has_send_first_chunk: |
| 443 | message = DeltaMessage( |
| 444 | content="", |
| 445 | role="assistant", |
| 446 | function_call=None, |
| 447 | ) |
| 448 | choice_data = ChatCompletionResponseStreamChoice( |
| 449 | index=0, |
| 450 | delta=message, |
| 451 | finish_reason=finish_reason |
| 452 | ) |
| 453 | chunk = ChatCompletionResponse( |
| 454 | model=model_id, |
| 455 | id="", |
| 456 | choices=[choice_data], |
| 457 | created=int(time.time()), |
| 458 | object="chat.completion.chunk" |
| 459 | ) |
| 460 | yield "{}".format(chunk.model_dump_json(exclude_unset=True)) |
| 461 | |
| 462 | send_msg = delta_text if has_send_first_chunk else output |
| 463 | has_send_first_chunk = True |
| 464 | message = DeltaMessage( |
| 465 | content=send_msg, |
| 466 | role="assistant", |
no test coverage detected