hub / github.com/zai-org/ChatGLM3 / predict

Function predict

openai_api_demo/api_server.py:343–406 · view source on GitHub ↗

(model_id: str, params: dict)

Source from the content-addressed store, hash-verified

341
342
343	async def predict(model_id: str, params: dict):
344	global model, tokenizer
345
346	choice_data = ChatCompletionResponseStreamChoice(
347	index=0,
348	delta=DeltaMessage(role="assistant"),
349	finish_reason=None
350	)
351	chunk = ChatCompletionResponse(model=model_id, id="", choices=[choice_data], object="chat.completion.chunk")
352	yield "{}".format(chunk.model_dump_json(exclude_unset=True))
353
354	previous_text = ""
355	for new_response in generate_stream_chatglm3(model, tokenizer, params):
356	decoded_unicode = new_response["text"]
357	delta_text = decoded_unicode[len(previous_text):]
358	previous_text = decoded_unicode
359
360	finish_reason = new_response["finish_reason"]
361	if len(delta_text) == 0 and finish_reason != "function_call":
362	continue
363
364	function_call = None
365	if finish_reason == "function_call":
366	try:
367	function_call = process_response(decoded_unicode, use_tool=True)
368	except:
369	logger.warning(
370	"Failed to parse tool call, maybe the response is not a tool call or have been answered.")
371
372	if isinstance(function_call, dict):
373	function_call = FunctionCallResponse(**function_call)
374
375	delta = DeltaMessage(
376	content=delta_text,
377	role="assistant",
378	function_call=function_call if isinstance(function_call, FunctionCallResponse) else None,
379	)
380
381	choice_data = ChatCompletionResponseStreamChoice(
382	index=0,
383	delta=delta,
384	finish_reason=finish_reason
385	)
386	chunk = ChatCompletionResponse(
387	model=model_id,
388	id="",
389	choices=[choice_data],
390	object="chat.completion.chunk"
391	)
392	yield "{}".format(chunk.model_dump_json(exclude_unset=True))
393
394	choice_data = ChatCompletionResponseStreamChoice(
395	index=0,
396	delta=DeltaMessage(),
397	finish_reason="stop"
398	)
399	chunk = ChatCompletionResponse(
400	model=model_id,

Callers 1

create_chat_completionFunction · 0.70

Calls 6

generate_stream_chatglm3Function · 0.90

process_responseFunction · 0.90

ChatCompletionResponseStreamChoiceClass · 0.70

DeltaMessageClass · 0.70

ChatCompletionResponseClass · 0.70

FunctionCallResponseClass · 0.70

Tested by

no test coverage detected