MCPcopy
hub / github.com/QwenLM/Qwen / predict

Function predict

openai_api.py:467–533  ·  view source on GitHub ↗
(
    query: str,
    history: List[List[str]],
    model_id: str,
    stop_words: List[str],
    gen_kwargs: Dict,
    system: str,
)

Source from the content-addressed store, hash-verified

465
466
467async def predict(
468 query: str,
469 history: List[List[str]],
470 model_id: str,
471 stop_words: List[str],
472 gen_kwargs: Dict,
473 system: str,
474):
475 global model, tokenizer
476 choice_data = ChatCompletionResponseStreamChoice(
477 index=0, delta=DeltaMessage(role='assistant'), finish_reason=None)
478 chunk = ChatCompletionResponse(model=model_id,
479 choices=[choice_data],
480 object='chat.completion.chunk')
481 yield '{}'.format(_dump_json(chunk, exclude_unset=True))
482
483 current_length = 0
484 stop_words_ids = [tokenizer.encode(s)
485 for s in stop_words] if stop_words else None
486
487 delay_token_num = max([len(x) for x in stop_words]) if stop_words_ids else 0
488 response_generator = model.chat_stream(tokenizer,
489 query,
490 history=history,
491 stop_words_ids=stop_words_ids,
492 system=system,
493 **gen_kwargs)
494 for _new_response in response_generator:
495 if len(_new_response) <= delay_token_num:
496 continue
497 new_response = _new_response[:-delay_token_num] if delay_token_num else _new_response
498
499 if len(new_response) == current_length:
500 continue
501
502 new_text = new_response[current_length:]
503 current_length = len(new_response)
504
505 choice_data = ChatCompletionResponseStreamChoice(
506 index=0, delta=DeltaMessage(content=new_text), finish_reason=None)
507 chunk = ChatCompletionResponse(model=model_id,
508 choices=[choice_data],
509 object='chat.completion.chunk')
510 yield '{}'.format(_dump_json(chunk, exclude_unset=True))
511
512 if current_length != len(_new_response):
513 # Determine whether to print the delay tokens
514 delayed_text = _new_response[current_length:]
515 new_text = trim_stop_words(delayed_text, stop_words)
516 if len(new_text) > 0:
517 choice_data = ChatCompletionResponseStreamChoice(
518 index=0, delta=DeltaMessage(content=new_text), finish_reason=None)
519 chunk = ChatCompletionResponse(model=model_id,
520 choices=[choice_data],
521 object='chat.completion.chunk')
522 yield '{}'.format(_dump_json(chunk, exclude_unset=True))
523
524 choice_data = ChatCompletionResponseStreamChoice(index=0,

Callers 1

create_chat_completionFunction · 0.70

Calls 6

DeltaMessageClass · 0.85
_dump_jsonFunction · 0.85
trim_stop_wordsFunction · 0.85
_gcFunction · 0.70

Tested by

no test coverage detected